list.of.packages <- c("dplyr", "compositions", "finalfit", "stats", "lmtest")
new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]
if(length(new.packages)) install.packages(new.packages)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
tbdar = read.delim("~/Sinergia/Analysis/Human/Ancestry/Data/interim/metadata_Sinergia_final_all_pats_morePopulations.txt",
sep = "\t")
tbdar$Introduction <- ifelse(tbdar$Introduction %in% c("Intro 1", "Intro 5", "Intro 9", "Intro 10", "other"), tbdar$Introduction, NA)
tbdar$Intro10 <- ifelse(tbdar$Introduction == "Intro 10", 1, ifelse(tbdar$Introduction %in% c("Intro 1", "Intro 5", "Intro 9", "other"), 0, NA))
# create category for TB score
tbdar$TB_score_category <- ifelse(tbdar$TB_score < 6, "mild", ifelse(tbdar$TB_score > 5 & tbdar$TB_score < 8, "moderate", ifelse(tbdar$TB_score > 7, "severe", "somethingelse")))
tbdar$TB_score_2cat <- ifelse(tbdar$TB_score_category == "mild", "mild", ifelse(tbdar$TB_score_category %in% c("moderate", "severe"), "severe", NA))
tbdar$TB_category <- ifelse(tbdar$TB_category == "relapse", "relapse", ifelse(tbdar$TB_category == "new", "new", NA))
tbdar$Intro10 <- as.factor(tbdar$Intro10)
tbdar$DR_status <- ifelse(tbdar$DR_status %in% c("INH-Mono", "MDR", "Susceptible"), tbdar$DR_status, NA)
# age
hist(tbdar$age)
tbdar$age_cat <- ifelse(tbdar$age < 30, 1, ifelse(tbdar$age >= 30 & tbdar$age < 40, 2, ifelse(tbdar$age >= 40 & tbdar$age < 50, 3, ifelse(tbdar$age >= 50, 4, NA))))
# add whether they are patients or control
tbdar$Type <- ifelse(tbdar$PATIENT_ID >= 83000 & !tbdar$Lineage %in% c("L1", "L2", "L4", "L3"), "control", "patient")
# make a column containing the common tribes
tbdar$common_tribes <- ifelse(unname(table(tbdar$patient_tribe)[tbdar$patient_tribe]) > 100, tbdar$patient_tribe, "other") # at least 100 patients with that tribe
tbdar$socio_cat <- ifelse(tbdar$socioeconomic_status < 100, 1, ifelse(tbdar$socioeconomic_status >= 100 & tbdar$socioeconomic_status < 200, 2, ifelse(tbdar$socioeconomic_status >= 200, 3, NA)))
# create a category for X-ray score:
tbdar$Xray_score_category_ralph <- ifelse(tbdar$Xray_score < 71, "mild", ifelse(tbdar$Xray_score >= 71, "severe", NA))
tbdar$Xray_01 <- ifelse(tbdar$Xray_score_category_ralph == "mild", 0, ifelse(tbdar$Xray_score_category_ralph == "severe", 1, NA))
unique(tbdar$Xray_score_category_ralph)
## [1] NA "mild" "severe"
tbdar$Xray_score_category_ralph <- as.factor(tbdar$Xray_score_category_ralph)
# categorize the cough duration
tbdar$cough_cat <- ifelse(tbdar$symptoms_duration_cough_duration >= 4, 3, ifelse(tbdar$symptoms_duration_cough_duration < 4 & tbdar$symptoms_duration_cough_duration >= 3, 2, ifelse(tbdar$symptoms_duration_cough_duration < 3 & tbdar$symptoms_duration_cough_duration >= 0, 1, NA)))
# modify the empty HIV status to NA
tbdar$HIV_status <- ifelse(tbdar$HIV_status %in% c("infected", "positive"), "infected", ifelse(tbdar$HIV_status == "negative", "negative", NA))
unique(tbdar$HIV_status)
## [1] "negative" "infected" NA
tbdar$TB_RF_smoking <- ifelse(tbdar$TB_RF_smoking == "no", "no", ifelse(tbdar$TB_RF_smoking == "yes", "yes", NA))
# add whether they are patients or control
# categories for age
tbdar$age_cat <- ifelse(tbdar$age < 30, 1, ifelse(tbdar$age >= 30 & tbdar$age < 40, 2, ifelse(tbdar$age >= 40 & tbdar$age < 50, 3, ifelse(tbdar$age >= 50, 4, NA))))
tbdar$DR_category <- ifelse(tbdar$DR_status %in% c("INH-Mono", "MDR"), "resistant", ifelse(tbdar$DR_status =="Susceptible", "susceptible", NA))
# turn all of them into a factor:
tbdar$TB_score_category <- as.factor(tbdar$TB_score_category)
tbdar$Xray_score_category <- as.factor(tbdar$Xray_score_category)
tbdar$patient_sex <- as.factor(tbdar$patient_sex)
tbdar$HIV_status <- as.factor(tbdar$HIV_status)
tbdar$TB_RF_smoking <- as.factor(tbdar$TB_RF_smoking)
tbdar$Xray_01 <- as.factor(tbdar$Xray_01)
tbdar$general_examination_malnutrition <- as.factor(tbdar$general_examination_malnutrition)
tbdar$Education <- as.factor(tbdar$Education)
tbdar$Type <- as.factor(tbdar$Type)
tbdar$DR_status <- as.factor(tbdar$DR_status)
tbdar$DR_category <- as.factor(tbdar$DR_category)
tbdar$age_cat <- as.factor(tbdar$age_cat)
tbdar$cough_cat <- as.factor(tbdar$cough_cat)
tbdar$TB_score_2cat <- as.factor(tbdar$TB_score_2cat)
tbdar$TB_category <- as.factor(tbdar$TB_category)
# relevel some factors:
tbdar$patient_sex <- relevel(tbdar$patient_sex, "male")
tbdar$HIV_status <- relevel(tbdar$HIV_status, "negative")
tbdar$Xray_01 <- relevel(tbdar$Xray_01, 1)
tbdar$TB_score_2cat <- relevel(tbdar$TB_score_2cat, "severe")
tbdar$DR_status <- relevel(tbdar$DR_status, "Susceptible")
tbdar$DR_category <- relevel(tbdar$DR_category, "susceptible")
tbdar$Education <- relevel(tbdar$Education, "primary") # most patients have primary education
tbdar$general_examination_malnutrition <- relevel(tbdar$general_examination_malnutrition, "no")
tbdar$other <- 1-(tbdar$Western_Bantu + tbdar$Southeastern_Bantu + tbdar$Eastern_Bantu)
tbdar$socio_cat <- as.factor(tbdar$socio_cat)
# Table 1 Manuscript
library(finalfit)
library(readr)
# only include patients with either a bacterial or a human genome
tbdar_subset <- subset(tbdar, Introduction %in% c("other", "Intro 1", "Intro 5", "Intro 9", "Intro 10") | Nigerian > 0, select=c("Xray_01", "Introduction", "Southeastern_Bantu", "Eastern_Bantu", "Western_Bantu", "other"))
explanatory = c("Introduction", "Southeastern_Bantu", "Eastern_Bantu", "Western_Bantu", "other")
dependent = "Xray_01"
tbdar_subset %>%
summary_factorlist(dependent = dependent, explanatory, add_col_totals = T, add_row_totals = T, include_row_missing_col = T, p = F, na_include_dependent = TRUE) -> t1
t1
## label Total N Missing N levels 0 1
## Total N (%) 849 (44.6) 177 (9.3)
## Introduction 764 (74.5) 262 Intro 1 28 (4.4) 7 (5.3)
## Intro 10 239 (37.8) 59 (44.7)
## Intro 5 38 (6.0) 4 (3.0)
## Intro 9 58 (9.2) 11 (8.3)
## other 269 (42.6) 51 (38.6)
## Southeastern_Bantu 840 (81.9) 186 Mean (SD) 0.4 (0.2) 0.5 (0.1)
## Eastern_Bantu 840 (81.9) 186 Mean (SD) 0.2 (0.1) 0.2 (0.1)
## Western_Bantu 840 (81.9) 186 Mean (SD) 0.1 (0.1) 0.1 (0.1)
## other 840 (81.9) 186 Mean (SD) 0.2 (0.1) 0.2 (0.1)
## (Missing)
## 878 (46.1)
## 36 (5.1)
## 274 (38.8)
## 44 (6.2)
## 60 (8.5)
## 293 (41.4)
## 0.4 (0.2)
## 0.2 (0.1)
## 0.1 (0.1)
## 0.2 (0.1)
# Table 1 Manuscript
library(finalfit)
library(readr)
# only include patients with either a bacterial or a human genome
tbdar_subset <- subset(tbdar, Introduction %in% c("other", "Intro 1", "Intro 5", "Intro 9", "Intro 10") | Nigerian > 0, select=c("TB_score_2cat", "Introduction", "Southeastern_Bantu", "Eastern_Bantu", "Western_Bantu", "other"))
explanatory = c("Introduction", "Southeastern_Bantu", "Eastern_Bantu", "Western_Bantu", "other")
dependent = "TB_score_2cat"
tbdar_subset %>%
summary_factorlist(dependent = dependent, explanatory, add_col_totals = T, add_row_totals = T, include_row_missing_col = T, p = F, na_include_dependent = TRUE) -> t1
t1
## label Total N Missing N levels severe mild
## Total N (%) 624 (32.8) 1280 (67.2)
## Introduction 1471 (77.3) 433 Intro 1 22 (4.5) 49 (5.0)
## Intro 10 184 (37.6) 388 (39.6)
## Intro 5 26 (5.3) 60 (6.1)
## Intro 9 51 (10.4) 78 (8.0)
## other 207 (42.2) 406 (41.4)
## Southeastern_Bantu 1442 (75.7) 462 Mean (SD) 0.4 (0.2) 0.4 (0.2)
## Eastern_Bantu 1442 (75.7) 462 Mean (SD) 0.2 (0.1) 0.2 (0.1)
## Western_Bantu 1442 (75.7) 462 Mean (SD) 0.1 (0.1) 0.1 (0.1)
## other 1442 (75.7) 462 Mean (SD) 0.2 (0.1) 0.2 (0.1)
## (Missing)
## 0 (0.0)
##
##
##
##
##
##
##
##
##
# Table 1 Manuscript
library(finalfit)
library(readr)
# only include patients with either a bacterial or a human genome
tbdar_subset <- subset(tbdar, Introduction %in% c("other", "Intro 1", "Intro 5", "Intro 9", "Intro 10") | Nigerian > 0, select=c("Ct_value", "Introduction", "Southeastern_Bantu", "Eastern_Bantu", "Western_Bantu", "other"))
explanatory = c("Introduction", "Southeastern_Bantu", "Eastern_Bantu", "Western_Bantu", "other")
dependent = "Ct_value"
tbdar_subset %>%
summary_factorlist(dependent = dependent, explanatory, add_row_totals = T, include_row_missing_col = T, p = F, na_include_dependent = TRUE) -> t1
t1
## label Total N Missing N levels unit value
## Introduction 863 (78.3) 239 Intro 1 Mean (sd) 20.5 (5.1)
## Intro 10 Mean (sd) 19.1 (4.7)
## Intro 5 Mean (sd) 19.5 (4.1)
## Intro 9 Mean (sd) 19.1 (4.7)
## other Mean (sd) 19.4 (4.9)
## Southeastern_Bantu 810 (73.5) 292 [0.0,0.7] Mean (sd) 19.9 (5.2)
## Eastern_Bantu 810 (73.5) 292 [0.1,0.4] Mean (sd) 19.9 (5.2)
## Western_Bantu 810 (73.5) 292 [0.0,0.4] Mean (sd) 19.9 (5.2)
## other 810 (73.5) 292 [0.1,0.9] Mean (sd) 19.9 (5.2)
library(finalfit)
library(readr)
# only include patients with either a bacterial or a human genome
tbdar_subset <- subset(tbdar, Introduction %in% c("other", "Intro 1", "Intro 5", "Intro 9", "Intro 10") | Nigerian > 0, select=c("Introduction", "patient_sex", "age", "HIV_status", "TB_RF_smoking", "Southeastern_Bantu", "Eastern_Bantu", "Western_Bantu", "other", "symptoms_duration_cough_duration", "socioeconomic_status", "Education", "general_examination_malnutrition", "TB_category", "DR_status"))
explanatory = c("patient_sex", "age", "HIV_status", "TB_RF_smoking", "symptoms_duration_cough_duration", "socioeconomic_status","Education", "general_examination_malnutrition", "TB_category", "DR_status", "Southeastern_Bantu", "Eastern_Bantu", "Western_Bantu", "other")
dependent = "Introduction"
tbdar_subset %>%
summary_factorlist(dependent = dependent, explanatory, add_col_totals = T, add_row_totals = T, include_row_missing_col = T, p = F, na_include_dependent = TRUE) -> t1
t1
## label Total N Missing N levels
## Total N (%)
## patient_sex 1471 (100.0) 0 male
## female
## age 1471 (100.0) 0 Mean (SD)
## HIV_status 1452 (98.7) 19 negative
## infected
## TB_RF_smoking 1462 (99.4) 9 no
## yes
## symptoms_duration_cough_duration 1454 (98.8) 17 Mean (SD)
## socioeconomic_status 1471 (100.0) 0 Mean (SD)
## Education 1471 (100.0) 0 primary
## no
## secondary
## university
## general_examination_malnutrition 1471 (100.0) 0 no
## yes
## TB_category 1447 (98.4) 24 new
## relapse
## DR_status 1471 (100.0) 0 Susceptible
## INH-Mono
## MDR
## Southeastern_Bantu 1009 (68.6) 462 Mean (SD)
## Eastern_Bantu 1009 (68.6) 462 Mean (SD)
## Western_Bantu 1009 (68.6) 462 Mean (SD)
## other 1009 (68.6) 462 Mean (SD)
## Intro 1 Intro 10 Intro 5 Intro 9 other
## 71 (3.7) 572 (30.0) 86 (4.5) 129 (6.8) 613 (32.2)
## 52 (73.2) 425 (74.3) 55 (64.0) 87 (67.4) 424 (69.2)
## 19 (26.8) 147 (25.7) 31 (36.0) 42 (32.6) 189 (30.8)
## 32.0 (8.4) 34.6 (10.5) 33.8 (11.0) 36.0 (12.4) 34.6 (10.4)
## 60 (85.7) 462 (81.8) 67 (77.9) 95 (75.4) 515 (85.1)
## 10 (14.3) 103 (18.2) 19 (22.1) 31 (24.6) 90 (14.9)
## 52 (74.3) 411 (72.5) 74 (87.1) 103 (79.8) 482 (78.9)
## 18 (25.7) 156 (27.5) 11 (12.9) 26 (20.2) 129 (21.1)
## 3.7 (2.0) 3.8 (1.8) 3.8 (2.0) 3.7 (1.7) 3.7 (1.9)
## 154.7 (415.7) 123.5 (240.3) 99.7 (75.2) 103.0 (85.1) 113.1 (162.6)
## 54 (76.1) 390 (68.2) 61 (70.9) 92 (71.3) 391 (63.8)
## 5 (7.0) 76 (13.3) 10 (11.6) 15 (11.6) 90 (14.7)
## 12 (16.9) 90 (15.7) 13 (15.1) 18 (14.0) 105 (17.1)
## 0 (0.0) 16 (2.8) 2 (2.3) 4 (3.1) 27 (4.4)
## 62 (87.3) 501 (87.6) 75 (87.2) 116 (89.9) 530 (86.5)
## 9 (12.7) 71 (12.4) 11 (12.8) 13 (10.1) 83 (13.5)
## 67 (94.4) 539 (96.8) 83 (97.6) 125 (97.7) 597 (98.5)
## 4 (5.6) 18 (3.2) 2 (2.4) 3 (2.3) 9 (1.5)
## 70 (98.6) 556 (97.2) 80 (93.0) 119 (92.2) 574 (93.6)
## 1 (1.4) 16 (2.8) 6 (7.0) 10 (7.8) 37 (6.0)
## 0 (0.0) 0 (0.0) 0 (0.0) 0 (0.0) 2 (0.3)
## 0.4 (0.1) 0.5 (0.2) 0.5 (0.2) 0.5 (0.1) 0.4 (0.2)
## 0.2 (0.1) 0.2 (0.1) 0.2 (0.1) 0.2 (0.1) 0.2 (0.1)
## 0.1 (0.1) 0.1 (0.1) 0.1 (0.1) 0.1 (0.1) 0.1 (0.1)
## 0.2 (0.1) 0.2 (0.1) 0.2 (0.1) 0.2 (0.1) 0.3 (0.1)
## (Missing)
## 433 (22.7)
## 302 (69.7)
## 131 (30.3)
## 35.9 (11.4)
## 330 (77.3)
## 97 (22.7)
## 334 (77.5)
## 97 (22.5)
## 3.6 (1.9)
## 111.6 (123.7)
## 273 (63.0)
## 76 (17.6)
## 64 (14.8)
## 20 (4.6)
## 377 (87.1)
## 56 (12.9)
## 416 (97.2)
## 12 (2.8)
## 0 (NaN)
## 0 (NaN)
## 0 (NaN)
## 0.4 (0.2)
## 0.2 (0.1)
## 0.1 (0.1)
## 0.3 (0.1)
library(finalfit)
library(readr)
#
tbdar_subset <- subset(tbdar, select=c("patient_sex", "age", "HIV_status", "TB_RF_smoking", "symptoms_duration_cough_duration", "socioeconomic_status", "Education", "general_examination_malnutrition", "Type", "common_tribes"))
# the variable smoking was not recorded in the controls,
explanatory = c("patient_sex", "age", "HIV_status", "TB_RF_smoking", "symptoms_duration_cough_duration", "socioeconomic_status","Education", "general_examination_malnutrition", "common_tribes")
dependent = "Type"
tbdar_subset %>%
summary_factorlist(dependent = dependent, explanatory, add_col_totals = T, add_row_totals = T, include_row_missing_col = T, p = T) -> t1
t1
## label Total N Missing N levels
## Total N (%)
## patient_sex 3101 (100.0) 0 male
## female
## age 3101 (100.0) 0 Mean (SD)
## HIV_status 3031 (97.7) 70 negative
## infected
## TB_RF_smoking 3090 (99.6) 11 no
## yes
## symptoms_duration_cough_duration 2590 (83.5) 511 Mean (SD)
## socioeconomic_status 3101 (100.0) 0 Mean (SD)
## Education 3101 (100.0) 0 primary
## no
## secondary
## university
## general_examination_malnutrition 3101 (100.0) 0 no
## yes
## common_tribes 3101 (100.0) 0 Chaga
## Makonde
## Matumbi
## Mwera
## Ndengereko
## Ngindo
## Ngoni
## Zaramo
## other
## control patient p
## 962 (31.0) 2139 (69.0)
## 610 (63.4) 1513 (70.7) <0.001
## 352 (36.6) 626 (29.3)
## 37.1 (10.7) 34.9 (10.8) <0.001
## 753 (81.4) 1690 (80.2) 0.488
## 172 (18.6) 416 (19.8)
## 626 (65.1) 1648 (77.4) <0.001
## 336 (34.9) 480 (22.6)
## 3.1 (1.6) 3.7 (1.8) <0.001
## 86.8 (132.5) 114.3 (182.7) <0.001
## 648 (67.4) 1420 (66.4) 0.009
## 167 (17.4) 303 (14.2)
## 123 (12.8) 335 (15.7)
## 24 (2.5) 81 (3.8)
## 934 (97.1) 1862 (87.1) <0.001
## 28 (2.9) 277 (12.9)
## 27 (2.8) 93 (4.3) 0.003
## 58 (6.0) 170 (7.9)
## 33 (3.4) 84 (3.9)
## 44 (4.6) 122 (5.7)
## 174 (18.1) 323 (15.1)
## 34 (3.5) 70 (3.3)
## 36 (3.7) 67 (3.1)
## 149 (15.5) 247 (11.5)
## 407 (42.3) 963 (45.0)
write_delim( t1,"/scicore/home/gagneux/zwymic00/Sinergia/Analysis/Human/Ancestry/Data/Outputs/Files/supplementary_table_controls.txt", delim = "\t")
library(compositions)
## Welcome to compositions, a package for compositional data analysis.
## Find an intro with "? compositions"
##
## Attaching package: 'compositions'
## The following objects are masked from 'package:stats':
##
## anova, cor, cov, dist, var
## The following object is masked from 'package:graphics':
##
## segments
## The following objects are masked from 'package:base':
##
## %*%, norm, scale, scale.default
# use the additive log ratio transformation to handle the ancestries since they are compositional variables (sum up to 1)
alr_data <- alr(tbdar[,c("Western_Bantu", "Southeastern_Bantu", "Eastern_Bantu","other")])
colnames(alr_data) <- c("Western_Bantu_alr", "Southeastern_Bantu_alr", "Eastern_Bantu_alr")
new_data <- cbind(tbdar, alr_data)
library(ggplot2)
library(mgcv)
## Loading required package: nlme
##
## Attaching package: 'nlme'
## The following object is masked from 'package:dplyr':
##
## collapse
## This is mgcv 1.9-1. For overview type 'help("mgcv-package")'.
for (anc in c("Western_Bantu_alr", "Southeastern_Bantu_alr", "Eastern_Bantu_alr")){
hist(new_data[, anc], xlab = anc, main = "")
}
# make some categories
new_data$Western_alr_cat <- ifelse(new_data$Western_Bantu_alr >= -1, 4, ifelse(new_data$Western_Bantu_alr < -1 & new_data$Western_Bantu_alr > -2, 3, ifelse(new_data$Western_Bantu_alr <= -2 & new_data$Western_Bantu_alr > -6, 2, ifelse(new_data$Western_Bantu_alr <= -6, 1, NA))))
new_data$Eastern_alr_cat <- ifelse(new_data$Eastern_Bantu_alr >= 0, 3, ifelse(new_data$Eastern_Bantu_alr < 0 & new_data$Eastern_Bantu_alr > -0.5, 2, ifelse(new_data$Eastern_Bantu_alr <= -0.5 , 1, NA)))
new_data$Southeastern_alr_cat <- ifelse(new_data$Southeastern_Bantu_alr >= 1, 3, ifelse(new_data$Southeastern_Bantu_alr < 1 & new_data$Southeastern_Bantu_alr > 0, 2, ifelse(new_data$Southeastern_Bantu_alr <= 0 , 1, NA)))
for (anc in c("Western", "Southeastern", "Eastern")){
print(anc)
print(table(new_data[,paste0(anc, "_alr_cat")]))
}
## [1] "Western"
##
## 1 2 3 4
## 104 211 488 639
## [1] "Southeastern"
##
## 1 2 3
## 313 603 526
## [1] "Eastern"
##
## 1 2 3
## 187 646 609
# how is the relationship between the outcome variables and the socioeconomic status?
clean <- na.omit(new_data[new_data$HIV_status == "negative", c("Xray_01", "socioeconomic_status")])
test <- glm(Xray_01 ~ socioeconomic_status, data = clean, na.action = na.exclude, family = binomial)
plot(test) # linearity looks fine to me according to https://bookdown.org/egarpor/PM-UC3M/glm-diagnostics.html Figure 5.14
clean <- na.omit(new_data[new_data$HIV_status == "negative", c("TB_score_2cat", "socioeconomic_status")])
test <- glm(TB_score_2cat ~ socioeconomic_status, data = clean, na.action = na.exclude, family = binomial)
plot(test)
clean <- na.omit(new_data[new_data$HIV_status == "negative", c("Ct_value", "socioeconomic_status")])
test <- glm(log10(Ct_value) ~ socioeconomic_status, data = clean, na.action = na.exclude, family = gaussian)
plot(test)
# make some test, to check, whether the assumptions of the model are fulfilled
# test the linearity on the log-odds scale
# do a Box-Tidwell test to see, whether the log transformed continuous variables have a significant interaction with themselves
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
# X-ray score
clean <- na.omit(new_data[new_data$HIV_status == "negative", c("Western_alr_cat", "Southeastern_alr_cat", "Eastern_alr_cat", "patient_sex", "age_cat", "symptoms_duration_cough_duration", "Intro10", "TB_RF_smoking", "Xray_01", "Western_Bantu_alr", "Southeastern_Bantu_alr", "Eastern_Bantu_alr", "cough_cat", "socioeconomic_status", "TB_category", "general_examination_malnutrition", "Education", "DR_category")])
test <- glm(Xray_01 ~ as.factor(Western_alr_cat) + as.factor(Eastern_alr_cat) + as.factor(Southeastern_alr_cat)+ Intro10 + as.factor(age_cat) + patient_sex + TB_RF_smoking + as.factor(cough_cat) + socioeconomic_status + socioeconomic_status:log(socioeconomic_status) + general_examination_malnutrition + Education + TB_category + DR_category, data = clean, family = binomial, na.action = na.exclude)
logodds <- test$linear.predictors
summary(test)
##
## Call:
## glm(formula = Xray_01 ~ as.factor(Western_alr_cat) + as.factor(Eastern_alr_cat) +
## as.factor(Southeastern_alr_cat) + Intro10 + as.factor(age_cat) +
## patient_sex + TB_RF_smoking + as.factor(cough_cat) + socioeconomic_status +
## socioeconomic_status:log(socioeconomic_status) + general_examination_malnutrition +
## Education + TB_category + DR_category, family = binomial,
## data = clean, na.action = na.exclude)
##
## Coefficients:
## Estimate Std. Error z value
## (Intercept) -1.714074 0.896771 -1.911
## as.factor(Western_alr_cat)2 -0.303688 0.534375 -0.568
## as.factor(Western_alr_cat)3 -0.648030 0.489604 -1.324
## as.factor(Western_alr_cat)4 -0.606521 0.477027 -1.271
## as.factor(Eastern_alr_cat)2 0.386321 0.489810 0.789
## as.factor(Eastern_alr_cat)3 0.226144 0.503048 0.450
## as.factor(Southeastern_alr_cat)2 0.867088 0.392235 2.211
## as.factor(Southeastern_alr_cat)3 0.848758 0.418560 2.028
## Intro101 0.492607 0.265737 1.854
## as.factor(age_cat)2 -0.045123 0.324959 -0.139
## as.factor(age_cat)3 -0.010947 0.382693 -0.029
## as.factor(age_cat)4 -0.030682 0.562190 -0.055
## patient_sexfemale -0.183113 0.313580 -0.584
## TB_RF_smokingyes -0.414714 0.346492 -1.197
## as.factor(cough_cat)2 -0.662846 0.413675 -1.602
## as.factor(cough_cat)3 -0.040582 0.296698 -0.137
## socioeconomic_status -0.018358 0.021685 -0.847
## general_examination_malnutritionyes 0.387367 0.368012 1.053
## Educationno 0.056587 0.403341 0.140
## Educationsecondary 0.161346 0.368946 0.437
## Educationuniversity -0.258894 0.664273 -0.390
## TB_categoryrelapse -0.075975 1.128605 -0.067
## DR_categoryresistant -0.492706 0.788882 -0.625
## socioeconomic_status:log(socioeconomic_status) 0.003068 0.003516 0.873
## Pr(>|z|)
## (Intercept) 0.0560 .
## as.factor(Western_alr_cat)2 0.5698
## as.factor(Western_alr_cat)3 0.1856
## as.factor(Western_alr_cat)4 0.2036
## as.factor(Eastern_alr_cat)2 0.4303
## as.factor(Eastern_alr_cat)3 0.6530
## as.factor(Southeastern_alr_cat)2 0.0271 *
## as.factor(Southeastern_alr_cat)3 0.0426 *
## Intro101 0.0638 .
## as.factor(age_cat)2 0.8896
## as.factor(age_cat)3 0.9772
## as.factor(age_cat)4 0.9565
## patient_sexfemale 0.5593
## TB_RF_smokingyes 0.2313
## as.factor(cough_cat)2 0.1091
## as.factor(cough_cat)3 0.8912
## socioeconomic_status 0.3972
## general_examination_malnutritionyes 0.2925
## Educationno 0.8884
## Educationsecondary 0.6619
## Educationuniversity 0.6967
## TB_categoryrelapse 0.9463
## DR_categoryresistant 0.5323
## socioeconomic_status:log(socioeconomic_status) 0.3829
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 421.34 on 475 degrees of freedom
## Residual deviance: 400.92 on 452 degrees of freedom
## AIC: 448.92
##
## Number of Fisher Scoring iterations: 5
# TB-score
clean <- na.omit(new_data[new_data$HIV_status == "negative", c("Western_alr_cat", "Southeastern_alr_cat", "Eastern_alr_cat", "patient_sex", "age_cat", "symptoms_duration_cough_duration", "Intro10", "TB_RF_smoking", "TB_score_2cat", "Western_Bantu_alr", "Southeastern_Bantu_alr", "Eastern_Bantu_alr", "cough_cat", "socioeconomic_status", "TB_category", "general_examination_malnutrition", "Education", "DR_category")])
test <- glm(TB_score_2cat ~ as.factor(Western_alr_cat) + as.factor(Eastern_alr_cat) + as.factor(Southeastern_alr_cat)+ Intro10 + as.factor(age_cat) + patient_sex + TB_RF_smoking + as.factor(cough_cat) + socioeconomic_status + socioeconomic_status:log(socioeconomic_status) + general_examination_malnutrition + Education + TB_category + DR_category, data = clean, family = binomial, na.action = na.exclude)
logodds <- test$linear.predictors
summary(test)
##
## Call:
## glm(formula = TB_score_2cat ~ as.factor(Western_alr_cat) + as.factor(Eastern_alr_cat) +
## as.factor(Southeastern_alr_cat) + Intro10 + as.factor(age_cat) +
## patient_sex + TB_RF_smoking + as.factor(cough_cat) + socioeconomic_status +
## socioeconomic_status:log(socioeconomic_status) + general_examination_malnutrition +
## Education + TB_category + DR_category, family = binomial,
## data = clean, na.action = na.exclude)
##
## Coefficients:
## Estimate Std. Error z value
## (Intercept) 0.9064289 0.4756788 1.906
## as.factor(Western_alr_cat)2 0.0069756 0.3627947 0.019
## as.factor(Western_alr_cat)3 0.0988675 0.3235889 0.306
## as.factor(Western_alr_cat)4 -0.0457328 0.3210485 -0.142
## as.factor(Eastern_alr_cat)2 0.1543690 0.2614055 0.591
## as.factor(Eastern_alr_cat)3 0.0582153 0.2699886 0.216
## as.factor(Southeastern_alr_cat)2 -0.4788609 0.2183675 -2.193
## as.factor(Southeastern_alr_cat)3 0.0324061 0.2391166 0.136
## Intro101 0.0247476 0.1687611 0.147
## as.factor(age_cat)2 0.2807053 0.1976608 1.420
## as.factor(age_cat)3 -0.0262999 0.2394222 -0.110
## as.factor(age_cat)4 0.3161308 0.3435733 0.920
## patient_sexfemale -0.2367093 0.1944373 -1.217
## TB_RF_smokingyes -0.1470433 0.2084468 -0.705
## as.factor(cough_cat)2 -0.1339090 0.2477538 -0.540
## as.factor(cough_cat)3 -0.2093950 0.1976269 -1.060
## socioeconomic_status 0.0091886 0.0040823 2.251
## general_examination_malnutritionyes -1.8113481 0.2512825 -7.208
## Educationno -0.1041520 0.2527282 -0.412
## Educationsecondary -0.3196979 0.2227096 -1.435
## Educationuniversity 0.1134083 0.4177544 0.271
## TB_categoryrelapse -0.6033336 0.6543214 -0.922
## DR_categoryresistant 0.4837574 0.4373979 1.106
## socioeconomic_status:log(socioeconomic_status) -0.0011201 0.0005015 -2.234
## Pr(>|z|)
## (Intercept) 0.0567 .
## as.factor(Western_alr_cat)2 0.9847
## as.factor(Western_alr_cat)3 0.7600
## as.factor(Western_alr_cat)4 0.8867
## as.factor(Eastern_alr_cat)2 0.5548
## as.factor(Eastern_alr_cat)3 0.8293
## as.factor(Southeastern_alr_cat)2 0.0283 *
## as.factor(Southeastern_alr_cat)3 0.8922
## Intro101 0.8834
## as.factor(age_cat)2 0.1556
## as.factor(age_cat)3 0.9125
## as.factor(age_cat)4 0.3575
## patient_sexfemale 0.2234
## TB_RF_smokingyes 0.4805
## as.factor(cough_cat)2 0.5889
## as.factor(cough_cat)3 0.2894
## socioeconomic_status 0.0244 *
## general_examination_malnutritionyes 5.66e-13 ***
## Educationno 0.6803
## Educationsecondary 0.1511
## Educationuniversity 0.7860
## TB_categoryrelapse 0.3565
## DR_categoryresistant 0.2687
## socioeconomic_status:log(socioeconomic_status) 0.0255 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1026.59 on 826 degrees of freedom
## Residual deviance: 936.16 on 803 degrees of freedom
## AIC: 984.16
##
## Number of Fisher Scoring iterations: 4
# Ct-value - look at the residual plots
clean <- na.omit(new_data[new_data$HIV_status == "negative", c("Western_alr_cat", "Southeastern_alr_cat", "Eastern_alr_cat", "patient_sex", "age_cat", "symptoms_duration_cough_duration", "Intro10", "TB_RF_smoking", "Ct_value", "Western_Bantu_alr", "Southeastern_Bantu_alr", "Eastern_Bantu_alr", "cough_cat", "socioeconomic_status", "TB_category", "general_examination_malnutrition", "Education", "DR_category")])
test <- glm(log10(Ct_value) ~ as.factor(Western_alr_cat) + as.factor(Eastern_alr_cat) + as.factor(Southeastern_alr_cat)+ Intro10 + as.factor(age_cat) + patient_sex + TB_RF_smoking + as.factor(cough_cat) + socioeconomic_status + general_examination_malnutrition + Education + TB_category + DR_category, data = clean, family = "gaussian", na.action = na.exclude)
plot(test)
–> for TB-score, there is linearity with the socioeconomic status. Thus let us categorize the variable –> added to the beginning already
–> seems fine, all VIF values are low…
library(car)
clean <- na.omit(new_data[new_data$HIV_status == "negative", c("Western_alr_cat", "Southeastern_alr_cat", "Eastern_alr_cat", "patient_sex", "age_cat", "symptoms_duration_cough_duration", "Intro10", "TB_RF_smoking", "Xray_01", "Western_Bantu_alr", "Southeastern_Bantu_alr", "Eastern_Bantu_alr", "cough_cat", "socio_cat", "TB_category", "general_examination_malnutrition", "Education", "DR_category")])
test <- glm(Xray_01 ~ as.factor(Western_alr_cat) + as.factor(Eastern_alr_cat) + as.factor(Southeastern_alr_cat)+ Intro10 + as.factor(age_cat) + patient_sex + TB_RF_smoking + as.factor(cough_cat) + socio_cat + general_examination_malnutrition + Education + TB_category + DR_category, data = clean, family = binomial, na.action = na.exclude)
vif(test)
## GVIF Df GVIF^(1/(2*Df))
## as.factor(Western_alr_cat) 1.263581 3 1.039762
## as.factor(Eastern_alr_cat) 1.219243 2 1.050806
## as.factor(Southeastern_alr_cat) 1.287133 2 1.065138
## Intro10 1.071373 1 1.035071
## as.factor(age_cat) 1.400526 3 1.057747
## patient_sex 1.224722 1 1.106672
## TB_RF_smoking 1.252161 1 1.119000
## as.factor(cough_cat) 1.152978 2 1.036228
## socio_cat 1.270735 2 1.061729
## general_examination_malnutrition 1.149464 1 1.072131
## Education 1.446129 3 1.063411
## TB_category 1.036860 1 1.018263
## DR_category 1.061153 1 1.030123
clean <- na.omit(new_data[new_data$HIV_status == "negative", c("Western_alr_cat", "Southeastern_alr_cat", "Eastern_alr_cat", "patient_sex", "age_cat", "symptoms_duration_cough_duration", "Intro10", "TB_RF_smoking", "TB_score_2cat", "Western_Bantu_alr", "Southeastern_Bantu_alr", "Eastern_Bantu_alr", "cough_cat", "socio_cat", "TB_category", "general_examination_malnutrition", "Education", "DR_category")])
test <- glm(TB_score_2cat ~ as.factor(Western_alr_cat) + as.factor(Eastern_alr_cat) + as.factor(Southeastern_alr_cat)+ Intro10 + as.factor(age_cat) + patient_sex + TB_RF_smoking + as.factor(cough_cat) + socio_cat + general_examination_malnutrition + Education + TB_category+ DR_category, data = clean, family = binomial, na.action = na.exclude)
vif(test)
## GVIF Df GVIF^(1/(2*Df))
## as.factor(Western_alr_cat) 1.197967 3 1.030562
## as.factor(Eastern_alr_cat) 1.219483 2 1.050858
## as.factor(Southeastern_alr_cat) 1.270586 2 1.061698
## Intro10 1.040253 1 1.019928
## as.factor(age_cat) 1.288336 3 1.043129
## patient_sex 1.229874 1 1.108997
## TB_RF_smoking 1.310898 1 1.144945
## as.factor(cough_cat) 1.054779 2 1.013422
## socio_cat 1.180576 2 1.042374
## general_examination_malnutrition 1.080151 1 1.039303
## Education 1.317486 3 1.047026
## TB_category 1.043649 1 1.021591
## DR_category 1.024508 1 1.012180
clean <- na.omit(new_data[new_data$HIV_status == "negative", c("Western_alr_cat", "Southeastern_alr_cat", "Eastern_alr_cat", "patient_sex", "age_cat", "symptoms_duration_cough_duration", "Intro10", "TB_RF_smoking", "TB_score_2cat", "Western_Bantu_alr", "Southeastern_Bantu_alr", "Eastern_Bantu_alr", "cough_cat", "socio_cat", "TB_category", "general_examination_malnutrition", "Education", "DR_category")])
test <- glm(TB_score_2cat ~ as.factor(Western_alr_cat) + as.factor(Eastern_alr_cat) + as.factor(Southeastern_alr_cat)+ Intro10 + as.factor(age_cat) + patient_sex + TB_RF_smoking + as.factor(cough_cat) + socio_cat + general_examination_malnutrition + Education + TB_category+ DR_category, data = clean, family = binomial, na.action = na.exclude)
vif(test)
## GVIF Df GVIF^(1/(2*Df))
## as.factor(Western_alr_cat) 1.197967 3 1.030562
## as.factor(Eastern_alr_cat) 1.219483 2 1.050858
## as.factor(Southeastern_alr_cat) 1.270586 2 1.061698
## Intro10 1.040253 1 1.019928
## as.factor(age_cat) 1.288336 3 1.043129
## patient_sex 1.229874 1 1.108997
## TB_RF_smoking 1.310898 1 1.144945
## as.factor(cough_cat) 1.054779 2 1.013422
## socio_cat 1.180576 2 1.042374
## general_examination_malnutrition 1.080151 1 1.039303
## Education 1.317486 3 1.047026
## TB_category 1.043649 1 1.021591
## DR_category 1.024508 1 1.012180
library(lmtest)
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
clean <- na.omit(new_data[new_data$HIV_status == "negative", c("Western_alr_cat", "Southeastern_alr_cat", "Eastern_alr_cat", "patient_sex", "age_cat", "symptoms_duration_cough_duration", "Intro10", "TB_RF_smoking", "Xray_01", "Western_Bantu_alr", "Southeastern_Bantu_alr", "Eastern_Bantu_alr", "cough_cat", "socio_cat", "general_examination_malnutrition", "TB_category", "Education", "DR_category", "DR_status")])
dim(clean)
## [1] 476 19
model1 <- glm(Xray_01 ~ as.factor(Western_alr_cat) + as.factor(Southeastern_alr_cat) + as.factor(Eastern_alr_cat), data = clean, family = binomial, na.action = na.exclude)
print(summary(model1))
##
## Call:
## glm(formula = Xray_01 ~ as.factor(Western_alr_cat) + as.factor(Southeastern_alr_cat) +
## as.factor(Eastern_alr_cat), family = binomial, data = clean,
## na.action = na.exclude)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.1597 0.6315 -3.420 0.000627 ***
## as.factor(Western_alr_cat)2 -0.3371 0.5168 -0.652 0.514234
## as.factor(Western_alr_cat)3 -0.6551 0.4774 -1.372 0.169960
## as.factor(Western_alr_cat)4 -0.6004 0.4584 -1.310 0.190308
## as.factor(Southeastern_alr_cat)2 0.8398 0.3842 2.186 0.028815 *
## as.factor(Southeastern_alr_cat)3 0.8083 0.3990 2.026 0.042768 *
## as.factor(Eastern_alr_cat)2 0.4603 0.4780 0.963 0.335524
## as.factor(Eastern_alr_cat)3 0.3687 0.4877 0.756 0.449669
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 421.34 on 475 degrees of freedom
## Residual deviance: 410.73 on 468 degrees of freedom
## AIC: 426.73
##
## Number of Fisher Scoring iterations: 5
ORs <- exp(cbind(OR = coef(model1), confint(model1)))
## Waiting for profiling to be done...
round(ORs, digits = 2)
## OR 2.5 % 97.5 %
## (Intercept) 0.12 0.03 0.37
## as.factor(Western_alr_cat)2 0.71 0.26 2.03
## as.factor(Western_alr_cat)3 0.52 0.21 1.38
## as.factor(Western_alr_cat)4 0.55 0.23 1.42
## as.factor(Southeastern_alr_cat)2 2.32 1.13 5.16
## as.factor(Southeastern_alr_cat)3 2.24 1.06 5.13
## as.factor(Eastern_alr_cat)2 1.58 0.66 4.43
## as.factor(Eastern_alr_cat)3 1.45 0.59 4.11
model2 <- glm(Xray_01 ~ 1, data = clean, family = binomial, na.action = na.exclude)
lrtest(model1, model2)
## Likelihood ratio test
##
## Model 1: Xray_01 ~ as.factor(Western_alr_cat) + as.factor(Southeastern_alr_cat) +
## as.factor(Eastern_alr_cat)
## Model 2: Xray_01 ~ 1
## #Df LogLik Df Chisq Pr(>Chisq)
## 1 8 -205.36
## 2 1 -210.67 -7 10.615 0.1563
–> looking at the summary I would say that Western_Bantu and Southeastern_Bantu are not linear, thus use them as categories
# add the introduction
model1 <- glm(Xray_01 ~ as.factor(Western_alr_cat) + as.factor(Eastern_alr_cat) + as.factor(Southeastern_alr_cat)+ Intro10, data = clean, family = binomial, na.action = na.exclude)
summary(model1)
##
## Call:
## glm(formula = Xray_01 ~ as.factor(Western_alr_cat) + as.factor(Eastern_alr_cat) +
## as.factor(Southeastern_alr_cat) + Intro10, family = binomial,
## data = clean, na.action = na.exclude)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.3495 0.6471 -3.631 0.000283 ***
## as.factor(Western_alr_cat)2 -0.3274 0.5192 -0.631 0.528358
## as.factor(Western_alr_cat)3 -0.6715 0.4799 -1.399 0.161704
## as.factor(Western_alr_cat)4 -0.5811 0.4601 -1.263 0.206600
## as.factor(Eastern_alr_cat)2 0.4926 0.4800 1.026 0.304788
## as.factor(Eastern_alr_cat)3 0.3711 0.4889 0.759 0.447828
## as.factor(Southeastern_alr_cat)2 0.8400 0.3843 2.186 0.028841 *
## as.factor(Southeastern_alr_cat)3 0.8137 0.3995 2.037 0.041658 *
## Intro101 0.4231 0.2569 1.647 0.099508 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 421.34 on 475 degrees of freedom
## Residual deviance: 408.05 on 467 degrees of freedom
## AIC: 426.05
##
## Number of Fisher Scoring iterations: 5
# add the covariates
# + age + patient_sex + TB_RF_smoking + HIV_status + symptoms_duration_cough_duration + socioeconomic_status
# do it one by one
model1 <- glm(Xray_01 ~ as.factor(Western_alr_cat) + as.factor(Eastern_alr_cat) + as.factor(Southeastern_alr_cat)+ Intro10 + as.factor(age_cat), data = clean, family = binomial, na.action = na.exclude)
summary(model1)
##
## Call:
## glm(formula = Xray_01 ~ as.factor(Western_alr_cat) + as.factor(Eastern_alr_cat) +
## as.factor(Southeastern_alr_cat) + Intro10 + as.factor(age_cat),
## family = binomial, data = clean, na.action = na.exclude)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.25815 0.66576 -3.392 0.000694 ***
## as.factor(Western_alr_cat)2 -0.31745 0.52249 -0.608 0.543467
## as.factor(Western_alr_cat)3 -0.66766 0.48129 -1.387 0.165369
## as.factor(Western_alr_cat)4 -0.56235 0.46361 -1.213 0.225141
## as.factor(Eastern_alr_cat)2 0.48207 0.48127 1.002 0.316504
## as.factor(Eastern_alr_cat)3 0.34687 0.49170 0.705 0.480524
## as.factor(Southeastern_alr_cat)2 0.84954 0.38488 2.207 0.027292 *
## as.factor(Southeastern_alr_cat)3 0.80930 0.40150 2.016 0.043832 *
## Intro101 0.41722 0.25754 1.620 0.105221
## as.factor(age_cat)2 -0.19231 0.29821 -0.645 0.519003
## as.factor(age_cat)3 -0.08697 0.35721 -0.243 0.807645
## as.factor(age_cat)4 -0.11508 0.53765 -0.214 0.830513
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 421.34 on 475 degrees of freedom
## Residual deviance: 407.62 on 464 degrees of freedom
## AIC: 431.62
##
## Number of Fisher Scoring iterations: 5
# add the covariates
# + age + patient_sex + TB_RF_smoking + HIV_status + symptoms_duration_cough_duration
# do it one by one
# sex
model1 <- glm(Xray_01 ~ as.factor(Western_alr_cat) + as.factor(Eastern_alr_cat) + as.factor(Southeastern_alr_cat)+ Intro10 + as.factor(age_cat) + patient_sex, data = clean, family = binomial, na.action = na.exclude)
summary(model1)
##
## Call:
## glm(formula = Xray_01 ~ as.factor(Western_alr_cat) + as.factor(Eastern_alr_cat) +
## as.factor(Southeastern_alr_cat) + Intro10 + as.factor(age_cat) +
## patient_sex, family = binomial, data = clean, na.action = na.exclude)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.23596 0.67979 -3.289 0.0010 **
## as.factor(Western_alr_cat)2 -0.32661 0.52572 -0.621 0.5344
## as.factor(Western_alr_cat)3 -0.67211 0.48221 -1.394 0.1634
## as.factor(Western_alr_cat)4 -0.56846 0.46529 -1.222 0.2218
## as.factor(Eastern_alr_cat)2 0.48185 0.48130 1.001 0.3168
## as.factor(Eastern_alr_cat)3 0.34952 0.49197 0.710 0.4774
## as.factor(Southeastern_alr_cat)2 0.84914 0.38479 2.207 0.0273 *
## as.factor(Southeastern_alr_cat)3 0.80816 0.40145 2.013 0.0441 *
## Intro101 0.41424 0.25820 1.604 0.1086
## as.factor(age_cat)2 -0.19525 0.29878 -0.653 0.5134
## as.factor(age_cat)3 -0.09040 0.35780 -0.253 0.8005
## as.factor(age_cat)4 -0.12676 0.54237 -0.234 0.8152
## patient_sexfemale -0.04622 0.28508 -0.162 0.8712
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 421.34 on 475 degrees of freedom
## Residual deviance: 407.60 on 463 degrees of freedom
## AIC: 433.6
##
## Number of Fisher Scoring iterations: 5
# add the covariates
# + age + patient_sex + TB_RF_smoking + symptoms_duration_cough_duration
# do it one by one
# smoking
model1 <- glm(Xray_01 ~ as.factor(Western_alr_cat) + as.factor(Eastern_alr_cat) + as.factor(Southeastern_alr_cat)+ Intro10 + as.factor(age_cat) + patient_sex + TB_RF_smoking, data = clean, family = binomial, na.action = na.exclude)
summary(model1)
##
## Call:
## glm(formula = Xray_01 ~ as.factor(Western_alr_cat) + as.factor(Eastern_alr_cat) +
## as.factor(Southeastern_alr_cat) + Intro10 + as.factor(age_cat) +
## patient_sex + TB_RF_smoking, family = binomial, data = clean,
## na.action = na.exclude)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.15884 0.68463 -3.153 0.00161 **
## as.factor(Western_alr_cat)2 -0.34258 0.52728 -0.650 0.51588
## as.factor(Western_alr_cat)3 -0.66261 0.48295 -1.372 0.17007
## as.factor(Western_alr_cat)4 -0.58627 0.46609 -1.258 0.20844
## as.factor(Eastern_alr_cat)2 0.48106 0.48143 0.999 0.31767
## as.factor(Eastern_alr_cat)3 0.35184 0.49195 0.715 0.47449
## as.factor(Southeastern_alr_cat)2 0.83923 0.38548 2.177 0.02947 *
## as.factor(Southeastern_alr_cat)3 0.80555 0.40193 2.004 0.04505 *
## Intro101 0.42846 0.25881 1.655 0.09783 .
## as.factor(age_cat)2 -0.15738 0.30167 -0.522 0.60188
## as.factor(age_cat)3 -0.02029 0.36562 -0.055 0.95575
## as.factor(age_cat)4 -0.08633 0.54464 -0.159 0.87405
## patient_sexfemale -0.14149 0.30145 -0.469 0.63880
## TB_RF_smokingyes -0.30645 0.33576 -0.913 0.36139
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 421.34 on 475 degrees of freedom
## Residual deviance: 406.74 on 462 degrees of freedom
## AIC: 434.74
##
## Number of Fisher Scoring iterations: 5
# add the covariates
# + age + patient_sex + TB_RF_smoking + symptoms_duration_cough_duration
# do it one by one
# cough duration
model1 <- glm(Xray_01 ~ as.factor(Western_alr_cat) + as.factor(Eastern_alr_cat) + as.factor(Southeastern_alr_cat)+ Intro10 + as.factor(age_cat) + patient_sex + TB_RF_smoking + as.factor(cough_cat), data = clean, family = binomial, na.action = na.exclude)
summary(model1)
##
## Call:
## glm(formula = Xray_01 ~ as.factor(Western_alr_cat) + as.factor(Eastern_alr_cat) +
## as.factor(Southeastern_alr_cat) + Intro10 + as.factor(age_cat) +
## patient_sex + TB_RF_smoking + as.factor(cough_cat), family = binomial,
## data = clean, na.action = na.exclude)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.98630 0.72744 -2.731 0.00632 **
## as.factor(Western_alr_cat)2 -0.35513 0.53145 -0.668 0.50399
## as.factor(Western_alr_cat)3 -0.66795 0.48668 -1.372 0.16992
## as.factor(Western_alr_cat)4 -0.61888 0.47083 -1.314 0.18869
## as.factor(Eastern_alr_cat)2 0.44071 0.48221 0.914 0.36074
## as.factor(Eastern_alr_cat)3 0.30616 0.49395 0.620 0.53538
## as.factor(Southeastern_alr_cat)2 0.82196 0.38744 2.122 0.03388 *
## as.factor(Southeastern_alr_cat)3 0.81739 0.40339 2.026 0.04274 *
## Intro101 0.46435 0.26236 1.770 0.07675 .
## as.factor(age_cat)2 -0.12753 0.30354 -0.420 0.67438
## as.factor(age_cat)3 -0.04063 0.36800 -0.110 0.91208
## as.factor(age_cat)4 -0.02895 0.54869 -0.053 0.95792
## patient_sexfemale -0.17076 0.30402 -0.562 0.57434
## TB_RF_smokingyes -0.35708 0.33893 -1.054 0.29210
## as.factor(cough_cat)2 -0.62899 0.40754 -1.543 0.12274
## as.factor(cough_cat)3 -0.02462 0.28354 -0.087 0.93080
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 421.34 on 475 degrees of freedom
## Residual deviance: 403.69 on 460 degrees of freedom
## AIC: 435.69
##
## Number of Fisher Scoring iterations: 5
# add the covariates
# + age + patient_sex + TB_RF_smoking + symptoms_duration_cough_duration + socioeconomic_status
# do it one by one
# socioeconomic_status
model1 <- glm(Xray_01 ~ as.factor(Western_alr_cat) + as.factor(Eastern_alr_cat) + as.factor(Southeastern_alr_cat)+ Intro10 + as.factor(age_cat) + patient_sex + TB_RF_smoking + as.factor(cough_cat) + as.factor(socio_cat), data = clean, family = binomial, na.action = na.exclude)
summary(model1)
##
## Call:
## glm(formula = Xray_01 ~ as.factor(Western_alr_cat) + as.factor(Eastern_alr_cat) +
## as.factor(Southeastern_alr_cat) + Intro10 + as.factor(age_cat) +
## patient_sex + TB_RF_smoking + as.factor(cough_cat) + as.factor(socio_cat),
## family = binomial, data = clean, na.action = na.exclude)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.8953739 0.7500866 -2.527 0.0115 *
## as.factor(Western_alr_cat)2 -0.3798857 0.5332104 -0.712 0.4762
## as.factor(Western_alr_cat)3 -0.6625474 0.4874575 -1.359 0.1741
## as.factor(Western_alr_cat)4 -0.6029257 0.4719330 -1.278 0.2014
## as.factor(Eastern_alr_cat)2 0.4266041 0.4826585 0.884 0.3768
## as.factor(Eastern_alr_cat)3 0.2887815 0.4939081 0.585 0.5588
## as.factor(Southeastern_alr_cat)2 0.8212903 0.3905007 2.103 0.0355 *
## as.factor(Southeastern_alr_cat)3 0.7903390 0.4101245 1.927 0.0540 .
## Intro101 0.4759988 0.2631291 1.809 0.0705 .
## as.factor(age_cat)2 -0.1457081 0.3046314 -0.478 0.6324
## as.factor(age_cat)3 -0.0545067 0.3686680 -0.148 0.8825
## as.factor(age_cat)4 -0.0655988 0.5519047 -0.119 0.9054
## patient_sexfemale -0.1905175 0.3091752 -0.616 0.5378
## TB_RF_smokingyes -0.3585102 0.3398542 -1.055 0.2915
## as.factor(cough_cat)2 -0.6278634 0.4082305 -1.538 0.1240
## as.factor(cough_cat)3 -0.0113128 0.2858056 -0.040 0.9684
## as.factor(socio_cat)2 -0.2295867 0.3040497 -0.755 0.4502
## as.factor(socio_cat)3 -0.0002465 0.4251929 -0.001 0.9995
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 421.34 on 475 degrees of freedom
## Residual deviance: 403.07 on 458 degrees of freedom
## AIC: 439.07
##
## Number of Fisher Scoring iterations: 5
# add the covariates
# + age + patient_sex + TB_RF_smoking + symptoms_duration_cough_duration + socioeconomic_status
# do it one by one
# socioeconomic_status
model1 <- glm(Xray_01 ~ as.factor(Western_alr_cat) + as.factor(Eastern_alr_cat) + as.factor(Southeastern_alr_cat)+ Intro10 + as.factor(age_cat) + patient_sex + TB_RF_smoking + as.factor(cough_cat) + as.factor(socio_cat) + general_examination_malnutrition, data = clean, family = binomial, na.action = na.exclude)
summary(model1)
##
## Call:
## glm(formula = Xray_01 ~ as.factor(Western_alr_cat) + as.factor(Eastern_alr_cat) +
## as.factor(Southeastern_alr_cat) + Intro10 + as.factor(age_cat) +
## patient_sex + TB_RF_smoking + as.factor(cough_cat) + as.factor(socio_cat) +
## general_examination_malnutrition, family = binomial, data = clean,
## na.action = na.exclude)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.922419 0.750680 -2.561 0.0104 *
## as.factor(Western_alr_cat)2 -0.368650 0.533227 -0.691 0.4893
## as.factor(Western_alr_cat)3 -0.632031 0.487479 -1.297 0.1948
## as.factor(Western_alr_cat)4 -0.574286 0.471553 -1.218 0.2233
## as.factor(Eastern_alr_cat)2 0.412149 0.483791 0.852 0.3943
## as.factor(Eastern_alr_cat)3 0.260222 0.495861 0.525 0.5997
## as.factor(Southeastern_alr_cat)2 0.825505 0.390612 2.113 0.0346 *
## as.factor(Southeastern_alr_cat)3 0.798272 0.410874 1.943 0.0520 .
## Intro101 0.506851 0.265083 1.912 0.0559 .
## as.factor(age_cat)2 -0.123658 0.304764 -0.406 0.6849
## as.factor(age_cat)3 -0.066283 0.369894 -0.179 0.8578
## as.factor(age_cat)4 -0.079430 0.551961 -0.144 0.8856
## patient_sexfemale -0.189293 0.309471 -0.612 0.5408
## TB_RF_smokingyes -0.412019 0.343685 -1.199 0.2306
## as.factor(cough_cat)2 -0.658729 0.409745 -1.608 0.1079
## as.factor(cough_cat)3 -0.077489 0.292086 -0.265 0.7908
## as.factor(socio_cat)2 -0.273726 0.307454 -0.890 0.3733
## as.factor(socio_cat)3 -0.007342 0.426167 -0.017 0.9863
## general_examination_malnutritionyes 0.424484 0.365998 1.160 0.2461
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 421.34 on 475 degrees of freedom
## Residual deviance: 401.78 on 457 degrees of freedom
## AIC: 439.78
##
## Number of Fisher Scoring iterations: 5
# add the covariates
# + age + patient_sex + TB_RF_smoking + symptoms_duration_cough_duration + socioeconomic_status
# do it one by one
# socioeconomic_status
model1 <- glm(Xray_01 ~ as.factor(Western_alr_cat) + as.factor(Eastern_alr_cat) + as.factor(Southeastern_alr_cat)+ Intro10 + as.factor(age_cat) + patient_sex + TB_RF_smoking + as.factor(cough_cat) + as.factor(socio_cat) + general_examination_malnutrition + TB_category, data = clean, family = binomial, na.action = na.exclude)
summary(model1)
##
## Call:
## glm(formula = Xray_01 ~ as.factor(Western_alr_cat) + as.factor(Eastern_alr_cat) +
## as.factor(Southeastern_alr_cat) + Intro10 + as.factor(age_cat) +
## patient_sex + TB_RF_smoking + as.factor(cough_cat) + as.factor(socio_cat) +
## general_examination_malnutrition + TB_category, family = binomial,
## data = clean, na.action = na.exclude)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.923386 0.751386 -2.560 0.0105 *
## as.factor(Western_alr_cat)2 -0.368560 0.533237 -0.691 0.4895
## as.factor(Western_alr_cat)3 -0.631399 0.487935 -1.294 0.1957
## as.factor(Western_alr_cat)4 -0.573614 0.472085 -1.215 0.2243
## as.factor(Eastern_alr_cat)2 0.412768 0.484224 0.852 0.3940
## as.factor(Eastern_alr_cat)3 0.260364 0.495877 0.525 0.5995
## as.factor(Southeastern_alr_cat)2 0.825633 0.390628 2.114 0.0345 *
## as.factor(Southeastern_alr_cat)3 0.799103 0.411790 1.941 0.0523 .
## Intro101 0.506798 0.265090 1.912 0.0559 .
## as.factor(age_cat)2 -0.123199 0.305144 -0.404 0.6864
## as.factor(age_cat)3 -0.065940 0.370063 -0.178 0.8586
## as.factor(age_cat)4 -0.078477 0.552888 -0.142 0.8871
## patient_sexfemale -0.189591 0.309626 -0.612 0.5403
## TB_RF_smokingyes -0.411991 0.343699 -1.199 0.2306
## as.factor(cough_cat)2 -0.659180 0.410019 -1.608 0.1079
## as.factor(cough_cat)3 -0.077258 0.292192 -0.264 0.7915
## as.factor(socio_cat)2 -0.273270 0.307818 -0.888 0.3747
## as.factor(socio_cat)3 -0.007818 0.426460 -0.018 0.9854
## general_examination_malnutritionyes 0.423805 0.366698 1.156 0.2478
## TB_categoryrelapse -0.033689 1.131928 -0.030 0.9763
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 421.34 on 475 degrees of freedom
## Residual deviance: 401.78 on 456 degrees of freedom
## AIC: 441.78
##
## Number of Fisher Scoring iterations: 5
# add the covariates
# + age + patient_sex + TB_RF_smoking + symptoms_duration_cough_duration + socioeconomic_status
# do it one by one
# socioeconomic_status
model1 <- glm(Xray_01 ~ as.factor(Western_alr_cat) + as.factor(Eastern_alr_cat) + as.factor(Southeastern_alr_cat)+ Intro10 + as.factor(age_cat) + patient_sex + TB_RF_smoking + as.factor(cough_cat) + as.factor(socio_cat)+ general_examination_malnutrition + TB_category + Education, data = clean, family = binomial, na.action = na.exclude)
summary(model1)
##
## Call:
## glm(formula = Xray_01 ~ as.factor(Western_alr_cat) + as.factor(Eastern_alr_cat) +
## as.factor(Southeastern_alr_cat) + Intro10 + as.factor(age_cat) +
## patient_sex + TB_RF_smoking + as.factor(cough_cat) + as.factor(socio_cat) +
## general_examination_malnutrition + TB_category + Education,
## family = binomial, data = clean, na.action = na.exclude)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.98536 0.75847 -2.618 0.00886 **
## as.factor(Western_alr_cat)2 -0.35939 0.53481 -0.672 0.50159
## as.factor(Western_alr_cat)3 -0.63664 0.48959 -1.300 0.19348
## as.factor(Western_alr_cat)4 -0.59621 0.47692 -1.250 0.21125
## as.factor(Eastern_alr_cat)2 0.40837 0.48580 0.841 0.40056
## as.factor(Eastern_alr_cat)3 0.25373 0.49876 0.509 0.61095
## as.factor(Southeastern_alr_cat)2 0.83689 0.39166 2.137 0.03262 *
## as.factor(Southeastern_alr_cat)3 0.82460 0.41900 1.968 0.04907 *
## Intro101 0.51887 0.26629 1.949 0.05135 .
## as.factor(age_cat)2 -0.06894 0.32435 -0.213 0.83169
## as.factor(age_cat)3 -0.02067 0.38218 -0.054 0.95687
## as.factor(age_cat)4 -0.03925 0.55884 -0.070 0.94400
## patient_sexfemale -0.19277 0.31010 -0.622 0.53418
## TB_RF_smokingyes -0.41777 0.34585 -1.208 0.22707
## as.factor(cough_cat)2 -0.66925 0.41139 -1.627 0.10378
## as.factor(cough_cat)3 -0.07732 0.29313 -0.264 0.79197
## as.factor(socio_cat)2 -0.27269 0.31126 -0.876 0.38098
## as.factor(socio_cat)3 0.01433 0.43520 0.033 0.97372
## general_examination_malnutritionyes 0.40919 0.37130 1.102 0.27044
## TB_categoryrelapse -0.03779 1.13271 -0.033 0.97339
## Educationno 0.03560 0.40310 0.088 0.92963
## Educationsecondary 0.19448 0.36959 0.526 0.59875
## Educationuniversity -0.20149 0.66662 -0.302 0.76245
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 421.34 on 475 degrees of freedom
## Residual deviance: 401.35 on 453 degrees of freedom
## AIC: 447.35
##
## Number of Fisher Scoring iterations: 5
# add the covariates
# + age + patient_sex + TB_RF_smoking + symptoms_duration_cough_duration + socioeconomic_status
# do it one by one
# socioeconomic_status
model1 <- glm(Xray_01 ~ as.factor(Western_alr_cat) + as.factor(Eastern_alr_cat) + as.factor(Southeastern_alr_cat)+ Intro10 + as.factor(age_cat) + patient_sex + TB_RF_smoking + as.factor(cough_cat) + as.factor(socio_cat)+ general_examination_malnutrition + TB_category + Education + DR_status, data = clean, family = binomial, na.action = na.exclude)
summary(model1)
##
## Call:
## glm(formula = Xray_01 ~ as.factor(Western_alr_cat) + as.factor(Eastern_alr_cat) +
## as.factor(Southeastern_alr_cat) + Intro10 + as.factor(age_cat) +
## patient_sex + TB_RF_smoking + as.factor(cough_cat) + as.factor(socio_cat) +
## general_examination_malnutrition + TB_category + Education +
## DR_status, family = binomial, data = clean, na.action = na.exclude)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.97543 0.75635 -2.612 0.00901 **
## as.factor(Western_alr_cat)2 -0.32864 0.53605 -0.613 0.53982
## as.factor(Western_alr_cat)3 -0.63461 0.48992 -1.295 0.19521
## as.factor(Western_alr_cat)4 -0.59465 0.47726 -1.246 0.21277
## as.factor(Eastern_alr_cat)2 0.37163 0.49035 0.758 0.44851
## as.factor(Eastern_alr_cat)3 0.22392 0.50438 0.444 0.65707
## as.factor(Southeastern_alr_cat)2 0.85958 0.39355 2.184 0.02895 *
## as.factor(Southeastern_alr_cat)3 0.83501 0.42045 1.986 0.04703 *
## Intro101 0.50999 0.26642 1.914 0.05559 .
## as.factor(age_cat)2 -0.04507 0.32519 -0.139 0.88976
## as.factor(age_cat)3 -0.01501 0.38267 -0.039 0.96872
## as.factor(age_cat)4 -0.02067 0.55982 -0.037 0.97055
## patient_sexfemale -0.17206 0.31086 -0.553 0.57992
## TB_RF_smokingyes -0.42329 0.34631 -1.222 0.22159
## as.factor(cough_cat)2 -0.66871 0.41171 -1.624 0.10433
## as.factor(cough_cat)3 -0.07704 0.29353 -0.262 0.79296
## as.factor(socio_cat)2 -0.26092 0.31222 -0.836 0.40332
## as.factor(socio_cat)3 0.02679 0.43567 0.061 0.95098
## general_examination_malnutritionyes 0.41065 0.37119 1.106 0.26860
## TB_categoryrelapse -0.04574 1.13199 -0.040 0.96777
## Educationno 0.04790 0.40472 0.118 0.90580
## Educationsecondary 0.18450 0.36956 0.499 0.61760
## Educationuniversity -0.20602 0.66670 -0.309 0.75731
## DR_statusINH-Mono -0.40393 0.79376 -0.509 0.61083
## DR_statusMDR -13.12837 882.74354 -0.015 0.98813
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 421.34 on 475 degrees of freedom
## Residual deviance: 400.66 on 451 degrees of freedom
## AIC: 450.66
##
## Number of Fisher Scoring iterations: 13
–> there are too little MDR cases, categorize the DR-status into susceptible and resistant –> adjusted in the beginning…
# add the covariates
model1 <- glm(Xray_01 ~ as.factor(Western_alr_cat) + as.factor(Eastern_alr_cat) + as.factor(Southeastern_alr_cat)+ Intro10 + as.factor(age_cat) + patient_sex + TB_RF_smoking + as.factor(cough_cat) + as.factor(socio_cat)+ general_examination_malnutrition + TB_category + Education + DR_category, data = clean, family = binomial, na.action = na.exclude)
summary(model1)
##
## Call:
## glm(formula = Xray_01 ~ as.factor(Western_alr_cat) + as.factor(Eastern_alr_cat) +
## as.factor(Southeastern_alr_cat) + Intro10 + as.factor(age_cat) +
## patient_sex + TB_RF_smoking + as.factor(cough_cat) + as.factor(socio_cat) +
## general_examination_malnutrition + TB_category + Education +
## DR_category, family = binomial, data = clean, na.action = na.exclude)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.97107 0.75633 -2.606 0.00916 **
## as.factor(Western_alr_cat)2 -0.33944 0.53612 -0.633 0.52664
## as.factor(Western_alr_cat)3 -0.63303 0.49004 -1.292 0.19643
## as.factor(Western_alr_cat)4 -0.59279 0.47735 -1.242 0.21429
## as.factor(Eastern_alr_cat)2 0.36694 0.49020 0.749 0.45412
## as.factor(Eastern_alr_cat)3 0.21082 0.50330 0.419 0.67530
## as.factor(Southeastern_alr_cat)2 0.85868 0.39374 2.181 0.02920 *
## as.factor(Southeastern_alr_cat)3 0.83781 0.42054 1.992 0.04634 *
## Intro101 0.51191 0.26650 1.921 0.05475 .
## as.factor(age_cat)2 -0.04971 0.32522 -0.153 0.87852
## as.factor(age_cat)3 -0.01532 0.38278 -0.040 0.96807
## as.factor(age_cat)4 -0.02374 0.55989 -0.042 0.96618
## patient_sexfemale -0.17864 0.31082 -0.575 0.56547
## TB_RF_smokingyes -0.42442 0.34646 -1.225 0.22057
## as.factor(cough_cat)2 -0.66239 0.41163 -1.609 0.10757
## as.factor(cough_cat)3 -0.07016 0.29332 -0.239 0.81097
## as.factor(socio_cat)2 -0.25823 0.31230 -0.827 0.40831
## as.factor(socio_cat)3 0.03100 0.43558 0.071 0.94327
## general_examination_malnutritionyes 0.41154 0.37131 1.108 0.26771
## TB_categoryrelapse -0.04890 1.13224 -0.043 0.96555
## Educationno 0.05384 0.40468 0.133 0.89416
## Educationsecondary 0.18487 0.36949 0.500 0.61684
## Educationuniversity -0.20094 0.66668 -0.301 0.76311
## DR_categoryresistant -0.47988 0.78984 -0.608 0.54348
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 421.34 on 475 degrees of freedom
## Residual deviance: 400.94 on 452 degrees of freedom
## AIC: 448.94
##
## Number of Fisher Scoring iterations: 5
library(lmtest)
model1 <- glm(Xray_01 ~ as.factor(Western_alr_cat) + as.factor(Eastern_alr_cat) + as.factor(Southeastern_alr_cat)+ Intro10 + as.factor(age_cat) + patient_sex + TB_RF_smoking + as.factor(cough_cat) + socio_cat + TB_category + general_examination_malnutrition + Education+ DR_category, data = clean, family = binomial, na.action = na.exclude)
model2 <- glm(Xray_01 ~ Intro10 + as.factor(age_cat) + patient_sex + TB_RF_smoking + as.factor(cough_cat) + socio_cat+ TB_category + general_examination_malnutrition + Education+ DR_category, data = clean, family = binomial, na.action = na.exclude)
summary(model1)
##
## Call:
## glm(formula = Xray_01 ~ as.factor(Western_alr_cat) + as.factor(Eastern_alr_cat) +
## as.factor(Southeastern_alr_cat) + Intro10 + as.factor(age_cat) +
## patient_sex + TB_RF_smoking + as.factor(cough_cat) + socio_cat +
## TB_category + general_examination_malnutrition + Education +
## DR_category, family = binomial, data = clean, na.action = na.exclude)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.97107 0.75633 -2.606 0.00916 **
## as.factor(Western_alr_cat)2 -0.33944 0.53612 -0.633 0.52664
## as.factor(Western_alr_cat)3 -0.63303 0.49004 -1.292 0.19643
## as.factor(Western_alr_cat)4 -0.59279 0.47735 -1.242 0.21429
## as.factor(Eastern_alr_cat)2 0.36694 0.49020 0.749 0.45412
## as.factor(Eastern_alr_cat)3 0.21082 0.50330 0.419 0.67530
## as.factor(Southeastern_alr_cat)2 0.85868 0.39374 2.181 0.02920 *
## as.factor(Southeastern_alr_cat)3 0.83781 0.42054 1.992 0.04634 *
## Intro101 0.51191 0.26650 1.921 0.05475 .
## as.factor(age_cat)2 -0.04971 0.32522 -0.153 0.87852
## as.factor(age_cat)3 -0.01532 0.38278 -0.040 0.96807
## as.factor(age_cat)4 -0.02374 0.55989 -0.042 0.96618
## patient_sexfemale -0.17864 0.31082 -0.575 0.56547
## TB_RF_smokingyes -0.42442 0.34646 -1.225 0.22057
## as.factor(cough_cat)2 -0.66239 0.41163 -1.609 0.10757
## as.factor(cough_cat)3 -0.07016 0.29332 -0.239 0.81097
## socio_cat2 -0.25823 0.31230 -0.827 0.40831
## socio_cat3 0.03100 0.43558 0.071 0.94327
## TB_categoryrelapse -0.04890 1.13224 -0.043 0.96555
## general_examination_malnutritionyes 0.41154 0.37131 1.108 0.26771
## Educationno 0.05384 0.40468 0.133 0.89416
## Educationsecondary 0.18487 0.36949 0.500 0.61684
## Educationuniversity -0.20094 0.66668 -0.301 0.76311
## DR_categoryresistant -0.47988 0.78984 -0.608 0.54348
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 421.34 on 475 degrees of freedom
## Residual deviance: 400.94 on 452 degrees of freedom
## AIC: 448.94
##
## Number of Fisher Scoring iterations: 5
lrtest(model1, model2)
## Likelihood ratio test
##
## Model 1: Xray_01 ~ as.factor(Western_alr_cat) + as.factor(Eastern_alr_cat) +
## as.factor(Southeastern_alr_cat) + Intro10 + as.factor(age_cat) +
## patient_sex + TB_RF_smoking + as.factor(cough_cat) + socio_cat +
## TB_category + general_examination_malnutrition + Education +
## DR_category
## Model 2: Xray_01 ~ Intro10 + as.factor(age_cat) + patient_sex + TB_RF_smoking +
## as.factor(cough_cat) + socio_cat + TB_category + general_examination_malnutrition +
## Education + DR_category
## #Df LogLik Df Chisq Pr(>Chisq)
## 1 24 -200.47
## 2 17 -205.34 -7 9.739 0.2039
ORs <- exp(cbind(OR = coef(model1), confint(model1)))
## Waiting for profiling to be done...
round(ORs, digits = 2)
## OR 2.5 % 97.5 %
## (Intercept) 0.14 0.03 0.58
## as.factor(Western_alr_cat)2 0.71 0.25 2.10
## as.factor(Western_alr_cat)3 0.53 0.21 1.45
## as.factor(Western_alr_cat)4 0.55 0.22 1.48
## as.factor(Eastern_alr_cat)2 1.44 0.58 4.11
## as.factor(Eastern_alr_cat)3 1.23 0.49 3.60
## as.factor(Southeastern_alr_cat)2 2.36 1.13 5.35
## as.factor(Southeastern_alr_cat)3 2.31 1.04 5.49
## Intro101 1.67 0.99 2.81
## as.factor(age_cat)2 0.95 0.50 1.80
## as.factor(age_cat)3 0.98 0.46 2.06
## as.factor(age_cat)4 0.98 0.30 2.76
## patient_sexfemale 0.84 0.45 1.53
## TB_RF_smokingyes 0.65 0.32 1.27
## as.factor(cough_cat)2 0.52 0.22 1.12
## as.factor(cough_cat)3 0.93 0.53 1.67
## socio_cat2 0.77 0.41 1.41
## socio_cat3 1.03 0.42 2.35
## TB_categoryrelapse 0.95 0.05 6.48
## general_examination_malnutritionyes 1.51 0.71 3.07
## Educationno 1.06 0.46 2.26
## Educationsecondary 1.20 0.57 2.45
## Educationuniversity 0.82 0.18 2.68
## DR_categoryresistant 0.62 0.09 2.40
model1 <- glm(Xray_01 ~ as.factor(Western_alr_cat)* Intro10 + as.factor(Eastern_alr_cat) * Intro10 + as.factor(Southeastern_alr_cat)* Intro10 + as.factor(age_cat) + patient_sex + TB_RF_smoking + as.factor(cough_cat) + socio_cat + TB_category + general_examination_malnutrition + Education+ DR_category, data = clean, family = binomial, na.action = na.exclude)
summary(model1)
##
## Call:
## glm(formula = Xray_01 ~ as.factor(Western_alr_cat) * Intro10 +
## as.factor(Eastern_alr_cat) * Intro10 + as.factor(Southeastern_alr_cat) *
## Intro10 + as.factor(age_cat) + patient_sex + TB_RF_smoking +
## as.factor(cough_cat) + socio_cat + TB_category + general_examination_malnutrition +
## Education + DR_category, family = binomial, data = clean,
## na.action = na.exclude)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.00831 0.95702 -2.099 0.0359
## as.factor(Western_alr_cat)2 -0.46162 0.80266 -0.575 0.5652
## as.factor(Western_alr_cat)3 -0.14571 0.73497 -0.198 0.8428
## as.factor(Western_alr_cat)4 0.07573 0.70372 0.108 0.9143
## Intro101 0.13737 1.52345 0.090 0.9282
## as.factor(Eastern_alr_cat)2 -0.28909 0.57960 -0.499 0.6179
## as.factor(Eastern_alr_cat)3 -0.58538 0.60492 -0.968 0.3332
## as.factor(Southeastern_alr_cat)2 1.35118 0.54947 2.459 0.0139
## as.factor(Southeastern_alr_cat)3 0.83440 0.58829 1.418 0.1561
## as.factor(age_cat)2 0.03185 0.33431 0.095 0.9241
## as.factor(age_cat)3 0.03314 0.39646 0.084 0.9334
## as.factor(age_cat)4 0.15779 0.56537 0.279 0.7802
## patient_sexfemale -0.20237 0.31779 -0.637 0.5243
## TB_RF_smokingyes -0.51616 0.35843 -1.440 0.1498
## as.factor(cough_cat)2 -0.74782 0.42300 -1.768 0.0771
## as.factor(cough_cat)3 -0.12065 0.30102 -0.401 0.6886
## socio_cat2 -0.25048 0.31928 -0.785 0.4327
## socio_cat3 0.17153 0.44511 0.385 0.7000
## TB_categoryrelapse -0.05243 1.15851 -0.045 0.9639
## general_examination_malnutritionyes 0.41028 0.38360 1.070 0.2848
## Educationno 0.08301 0.42147 0.197 0.8439
## Educationsecondary 0.16923 0.37872 0.447 0.6550
## Educationuniversity -0.39236 0.68054 -0.577 0.5643
## DR_categoryresistant -0.42738 0.79761 -0.536 0.5921
## as.factor(Western_alr_cat)2:Intro101 0.08132 1.16012 0.070 0.9441
## as.factor(Western_alr_cat)3:Intro101 -1.00725 1.06956 -0.942 0.3463
## as.factor(Western_alr_cat)4:Intro101 -1.68837 1.03160 -1.637 0.1017
## Intro101:as.factor(Eastern_alr_cat)2 1.90069 1.24917 1.522 0.1281
## Intro101:as.factor(Eastern_alr_cat)3 2.07009 1.25592 1.648 0.0993
## Intro101:as.factor(Southeastern_alr_cat)2 -0.99077 0.81421 -1.217 0.2237
## Intro101:as.factor(Southeastern_alr_cat)3 0.06626 0.83183 0.080 0.9365
##
## (Intercept) *
## as.factor(Western_alr_cat)2
## as.factor(Western_alr_cat)3
## as.factor(Western_alr_cat)4
## Intro101
## as.factor(Eastern_alr_cat)2
## as.factor(Eastern_alr_cat)3
## as.factor(Southeastern_alr_cat)2 *
## as.factor(Southeastern_alr_cat)3
## as.factor(age_cat)2
## as.factor(age_cat)3
## as.factor(age_cat)4
## patient_sexfemale
## TB_RF_smokingyes
## as.factor(cough_cat)2 .
## as.factor(cough_cat)3
## socio_cat2
## socio_cat3
## TB_categoryrelapse
## general_examination_malnutritionyes
## Educationno
## Educationsecondary
## Educationuniversity
## DR_categoryresistant
## as.factor(Western_alr_cat)2:Intro101
## as.factor(Western_alr_cat)3:Intro101
## as.factor(Western_alr_cat)4:Intro101
## Intro101:as.factor(Eastern_alr_cat)2
## Intro101:as.factor(Eastern_alr_cat)3 .
## Intro101:as.factor(Southeastern_alr_cat)2
## Intro101:as.factor(Southeastern_alr_cat)3
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 421.34 on 475 degrees of freedom
## Residual deviance: 387.03 on 445 degrees of freedom
## AIC: 449.03
##
## Number of Fisher Scoring iterations: 5
ORs <- exp(cbind(OR = coef(model1), confint(model1)))
## Waiting for profiling to be done...
round(ORs, digits = 2)
## OR 2.5 % 97.5 %
## (Intercept) 0.13 0.02 0.79
## as.factor(Western_alr_cat)2 0.63 0.14 3.46
## as.factor(Western_alr_cat)3 0.86 0.22 4.32
## as.factor(Western_alr_cat)4 1.08 0.30 5.16
## Intro101 1.15 0.04 19.97
## as.factor(Eastern_alr_cat)2 0.75 0.25 2.54
## as.factor(Eastern_alr_cat)3 0.56 0.18 1.97
## as.factor(Southeastern_alr_cat)2 3.86 1.41 12.65
## as.factor(Southeastern_alr_cat)3 2.30 0.77 8.00
## as.factor(age_cat)2 1.03 0.53 1.99
## as.factor(age_cat)3 1.03 0.47 2.22
## as.factor(age_cat)4 1.17 0.35 3.35
## patient_sexfemale 0.82 0.43 1.51
## TB_RF_smokingyes 0.60 0.29 1.18
## as.factor(cough_cat)2 0.47 0.20 1.05
## as.factor(cough_cat)3 0.89 0.49 1.61
## socio_cat2 0.78 0.41 1.44
## socio_cat3 1.19 0.48 2.77
## TB_categoryrelapse 0.95 0.05 6.80
## general_examination_malnutritionyes 1.51 0.69 3.14
## Educationno 1.09 0.46 2.41
## Educationsecondary 1.18 0.55 2.46
## Educationuniversity 0.68 0.15 2.28
## DR_categoryresistant 0.65 0.10 2.59
## as.factor(Western_alr_cat)2:Intro101 1.08 0.10 10.38
## as.factor(Western_alr_cat)3:Intro101 0.37 0.04 2.87
## as.factor(Western_alr_cat)4:Intro101 0.18 0.02 1.34
## Intro101:as.factor(Eastern_alr_cat)2 6.69 0.75 152.26
## Intro101:as.factor(Eastern_alr_cat)3 7.93 0.87 181.77
## Intro101:as.factor(Southeastern_alr_cat)2 0.37 0.07 1.86
## Intro101:as.factor(Southeastern_alr_cat)3 1.07 0.20 5.56
model2 <- glm(Xray_01 ~ as.factor(Western_alr_cat)+ as.factor(Eastern_alr_cat) + as.factor(Southeastern_alr_cat)+ Intro10 + as.factor(age_cat) + patient_sex + TB_RF_smoking + as.factor(cough_cat) + socio_cat + TB_category + general_examination_malnutrition + Education+ DR_category, data = clean, family = binomial, na.action = na.exclude)
lrtest(model2, model1)
## Likelihood ratio test
##
## Model 1: Xray_01 ~ as.factor(Western_alr_cat) + as.factor(Eastern_alr_cat) +
## as.factor(Southeastern_alr_cat) + Intro10 + as.factor(age_cat) +
## patient_sex + TB_RF_smoking + as.factor(cough_cat) + socio_cat +
## TB_category + general_examination_malnutrition + Education +
## DR_category
## Model 2: Xray_01 ~ as.factor(Western_alr_cat) * Intro10 + as.factor(Eastern_alr_cat) *
## Intro10 + as.factor(Southeastern_alr_cat) * Intro10 + as.factor(age_cat) +
## patient_sex + TB_RF_smoking + as.factor(cough_cat) + socio_cat +
## TB_category + general_examination_malnutrition + Education +
## DR_category
## #Df LogLik Df Chisq Pr(>Chisq)
## 1 24 -200.47
## 2 31 -193.52 7 13.908 0.05284 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
–> not really, values change slightly…
model1 <- glm(Xray_01 ~ as.factor(Western_alr_cat)* Intro10 + as.factor(Eastern_alr_cat) * Intro10 + as.factor(Southeastern_alr_cat)* Intro10 + as.factor(age_cat) + patient_sex + TB_RF_smoking + socio_cat + TB_category + general_examination_malnutrition + Education+ DR_category, data = clean, family = binomial, na.action = na.exclude)
summary(model1)
##
## Call:
## glm(formula = Xray_01 ~ as.factor(Western_alr_cat) * Intro10 +
## as.factor(Eastern_alr_cat) * Intro10 + as.factor(Southeastern_alr_cat) *
## Intro10 + as.factor(age_cat) + patient_sex + TB_RF_smoking +
## socio_cat + TB_category + general_examination_malnutrition +
## Education + DR_category, family = binomial, data = clean,
## na.action = na.exclude)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.30239 0.92070 -2.501 0.0124
## as.factor(Western_alr_cat)2 -0.33045 0.79576 -0.415 0.6779
## as.factor(Western_alr_cat)3 -0.06567 0.73185 -0.090 0.9285
## as.factor(Western_alr_cat)4 0.17412 0.69961 0.249 0.8035
## Intro101 0.16748 1.52078 0.110 0.9123
## as.factor(Eastern_alr_cat)2 -0.22785 0.57661 -0.395 0.6927
## as.factor(Eastern_alr_cat)3 -0.55605 0.60249 -0.923 0.3561
## as.factor(Southeastern_alr_cat)2 1.36039 0.54846 2.480 0.0131
## as.factor(Southeastern_alr_cat)3 0.83597 0.58548 1.428 0.1533
## as.factor(age_cat)2 -0.01692 0.33111 -0.051 0.9593
## as.factor(age_cat)3 0.04379 0.39364 0.111 0.9114
## as.factor(age_cat)4 0.05777 0.55986 0.103 0.9178
## patient_sexfemale -0.17224 0.31528 -0.546 0.5849
## TB_RF_smokingyes -0.44580 0.35284 -1.263 0.2064
## socio_cat2 -0.26356 0.31907 -0.826 0.4088
## socio_cat3 0.20594 0.44109 0.467 0.6406
## TB_categoryrelapse 0.07240 1.15210 0.063 0.9499
## general_examination_malnutritionyes 0.42712 0.37255 1.146 0.2516
## Educationno 0.03588 0.41710 0.086 0.9314
## Educationsecondary 0.14028 0.37522 0.374 0.7085
## Educationuniversity -0.35290 0.67402 -0.524 0.6006
## DR_categoryresistant -0.46402 0.79838 -0.581 0.5611
## as.factor(Western_alr_cat)2:Intro101 -0.13897 1.14500 -0.121 0.9034
## as.factor(Western_alr_cat)3:Intro101 -1.11454 1.05718 -1.054 0.2918
## as.factor(Western_alr_cat)4:Intro101 -1.75688 1.02416 -1.715 0.0863
## Intro101:as.factor(Eastern_alr_cat)2 1.85394 1.25497 1.477 0.1396
## Intro101:as.factor(Eastern_alr_cat)3 2.11859 1.26283 1.678 0.0934
## Intro101:as.factor(Southeastern_alr_cat)2 -0.94476 0.81266 -1.163 0.2450
## Intro101:as.factor(Southeastern_alr_cat)3 0.05187 0.82712 0.063 0.9500
##
## (Intercept) *
## as.factor(Western_alr_cat)2
## as.factor(Western_alr_cat)3
## as.factor(Western_alr_cat)4
## Intro101
## as.factor(Eastern_alr_cat)2
## as.factor(Eastern_alr_cat)3
## as.factor(Southeastern_alr_cat)2 *
## as.factor(Southeastern_alr_cat)3
## as.factor(age_cat)2
## as.factor(age_cat)3
## as.factor(age_cat)4
## patient_sexfemale
## TB_RF_smokingyes
## socio_cat2
## socio_cat3
## TB_categoryrelapse
## general_examination_malnutritionyes
## Educationno
## Educationsecondary
## Educationuniversity
## DR_categoryresistant
## as.factor(Western_alr_cat)2:Intro101
## as.factor(Western_alr_cat)3:Intro101
## as.factor(Western_alr_cat)4:Intro101 .
## Intro101:as.factor(Eastern_alr_cat)2
## Intro101:as.factor(Eastern_alr_cat)3 .
## Intro101:as.factor(Southeastern_alr_cat)2
## Intro101:as.factor(Southeastern_alr_cat)3
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 421.34 on 475 degrees of freedom
## Residual deviance: 390.63 on 447 degrees of freedom
## AIC: 448.63
##
## Number of Fisher Scoring iterations: 5
ORs <- exp(cbind(OR = coef(model1), confint(model1)))
## Waiting for profiling to be done...
round(ORs, digits = 2)
## OR 2.5 % 97.5 %
## (Intercept) 0.10 0.01 0.54
## as.factor(Western_alr_cat)2 0.72 0.16 3.90
## as.factor(Western_alr_cat)3 0.94 0.24 4.66
## as.factor(Western_alr_cat)4 1.19 0.34 5.66
## Intro101 1.18 0.04 20.48
## as.factor(Eastern_alr_cat)2 0.80 0.27 2.69
## as.factor(Eastern_alr_cat)3 0.57 0.18 2.02
## as.factor(Southeastern_alr_cat)2 3.90 1.43 12.75
## as.factor(Southeastern_alr_cat)3 2.31 0.77 7.98
## as.factor(age_cat)2 0.98 0.51 1.88
## as.factor(age_cat)3 1.04 0.47 2.23
## as.factor(age_cat)4 1.06 0.32 2.99
## patient_sexfemale 0.84 0.45 1.55
## TB_RF_smokingyes 0.64 0.31 1.26
## socio_cat2 0.77 0.40 1.42
## socio_cat3 1.23 0.50 2.84
## TB_categoryrelapse 1.08 0.05 7.61
## general_examination_malnutritionyes 1.53 0.72 3.12
## Educationno 1.04 0.44 2.28
## Educationsecondary 1.15 0.54 2.37
## Educationuniversity 0.70 0.15 2.33
## DR_categoryresistant 0.63 0.09 2.49
## as.factor(Western_alr_cat)2:Intro101 0.87 0.09 8.06
## as.factor(Western_alr_cat)3:Intro101 0.33 0.04 2.51
## as.factor(Western_alr_cat)4:Intro101 0.17 0.02 1.23
## Intro101:as.factor(Eastern_alr_cat)2 6.38 0.71 146.86
## Intro101:as.factor(Eastern_alr_cat)3 8.32 0.91 193.14
## Intro101:as.factor(Southeastern_alr_cat)2 0.39 0.08 1.94
## Intro101:as.factor(Southeastern_alr_cat)3 1.05 0.20 5.43
model2 <- glm(Xray_01 ~ as.factor(Western_alr_cat)+ as.factor(Eastern_alr_cat) + as.factor(Southeastern_alr_cat)+ Intro10 + as.factor(age_cat) + patient_sex + TB_RF_smoking + socio_cat + TB_category + general_examination_malnutrition + Education+ DR_category, data = clean, family = binomial, na.action = na.exclude)
lrtest(model2, model1)
## Likelihood ratio test
##
## Model 1: Xray_01 ~ as.factor(Western_alr_cat) + as.factor(Eastern_alr_cat) +
## as.factor(Southeastern_alr_cat) + Intro10 + as.factor(age_cat) +
## patient_sex + TB_RF_smoking + socio_cat + TB_category + general_examination_malnutrition +
## Education + DR_category
## Model 2: Xray_01 ~ as.factor(Western_alr_cat) * Intro10 + as.factor(Eastern_alr_cat) *
## Intro10 + as.factor(Southeastern_alr_cat) * Intro10 + as.factor(age_cat) +
## patient_sex + TB_RF_smoking + socio_cat + TB_category + general_examination_malnutrition +
## Education + DR_category
## #Df LogLik Df Chisq Pr(>Chisq)
## 1 22 -202.03
## 2 29 -195.31 7 13.427 0.06237 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
library(lmtest)
clean <- na.omit(new_data[new_data$HIV_status == "negative", c("Western_alr_cat", "Southeastern_alr_cat", "Eastern_alr_cat", "patient_sex", "age_cat", "symptoms_duration_cough_duration", "Intro10", "TB_RF_smoking", "TB_score_2cat", "Western_Bantu_alr", "Southeastern_Bantu_alr", "Eastern_Bantu_alr", "cough_cat", "socio_cat", "TB_category", "general_examination_malnutrition", "Education", "DR_category")])
dim(clean)
## [1] 827 18
model1 <- glm(TB_score_2cat ~ as.factor(Western_alr_cat) + as.factor(Southeastern_alr_cat) + as.factor(Eastern_alr_cat), data = clean, family = binomial, na.action = na.exclude)
print(summary(model1))
##
## Call:
## glm(formula = TB_score_2cat ~ as.factor(Western_alr_cat) + as.factor(Southeastern_alr_cat) +
## as.factor(Eastern_alr_cat), family = binomial, data = clean,
## na.action = na.exclude)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.70490 0.33236 2.121 0.0339 *
## as.factor(Western_alr_cat)2 0.20267 0.33418 0.606 0.5442
## as.factor(Western_alr_cat)3 0.28739 0.29728 0.967 0.3337
## as.factor(Western_alr_cat)4 0.21666 0.29239 0.741 0.4587
## as.factor(Southeastern_alr_cat)2 -0.34864 0.20122 -1.733 0.0832 .
## as.factor(Southeastern_alr_cat)3 0.09367 0.21729 0.431 0.6664
## as.factor(Eastern_alr_cat)2 0.05064 0.24584 0.206 0.8368
## as.factor(Eastern_alr_cat)3 -0.09498 0.25347 -0.375 0.7079
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1026.6 on 826 degrees of freedom
## Residual deviance: 1018.8 on 819 degrees of freedom
## AIC: 1034.8
##
## Number of Fisher Scoring iterations: 4
ORs <- exp(cbind(OR = coef(model1), confint(model1)))
## Waiting for profiling to be done...
round(ORs, digits = 2)
## OR 2.5 % 97.5 %
## (Intercept) 2.02 1.07 3.94
## as.factor(Western_alr_cat)2 1.22 0.63 2.35
## as.factor(Western_alr_cat)3 1.33 0.74 2.37
## as.factor(Western_alr_cat)4 1.24 0.69 2.18
## as.factor(Southeastern_alr_cat)2 0.71 0.47 1.04
## as.factor(Southeastern_alr_cat)3 1.10 0.72 1.68
## as.factor(Eastern_alr_cat)2 1.05 0.64 1.69
## as.factor(Eastern_alr_cat)3 0.91 0.55 1.49
model2 <- glm(TB_score_2cat ~ 1, data = clean, family = binomial, na.action = na.exclude)
lrtest(model1, model2)
## Likelihood ratio test
##
## Model 1: TB_score_2cat ~ as.factor(Western_alr_cat) + as.factor(Southeastern_alr_cat) +
## as.factor(Eastern_alr_cat)
## Model 2: TB_score_2cat ~ 1
## #Df LogLik Df Chisq Pr(>Chisq)
## 1 8 -509.39
## 2 1 -513.29 -7 7.8013 0.3504
# add the introduction
model1 <- glm(TB_score_2cat ~ as.factor(Western_alr_cat) + as.factor(Eastern_alr_cat) + as.factor(Southeastern_alr_cat)+ Intro10, data = clean, family = binomial, na.action = na.exclude)
summary(model1)
##
## Call:
## glm(formula = TB_score_2cat ~ as.factor(Western_alr_cat) + as.factor(Eastern_alr_cat) +
## as.factor(Southeastern_alr_cat) + Intro10, family = binomial,
## data = clean, na.action = na.exclude)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.68930 0.33714 2.045 0.0409 *
## as.factor(Western_alr_cat)2 0.20506 0.33431 0.613 0.5396
## as.factor(Western_alr_cat)3 0.28703 0.29725 0.966 0.3342
## as.factor(Western_alr_cat)4 0.21841 0.29245 0.747 0.4552
## as.factor(Eastern_alr_cat)2 0.05215 0.24587 0.212 0.8320
## as.factor(Eastern_alr_cat)3 -0.09711 0.25357 -0.383 0.7018
## as.factor(Southeastern_alr_cat)2 -0.35010 0.20130 -1.739 0.0820 .
## as.factor(Southeastern_alr_cat)3 0.09287 0.21731 0.427 0.6691
## Intro101 0.04348 0.15768 0.276 0.7827
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1026.6 on 826 degrees of freedom
## Residual deviance: 1018.7 on 818 degrees of freedom
## AIC: 1036.7
##
## Number of Fisher Scoring iterations: 4
# add the covariates
# + age + patient_sex + TB_RF_smoking + HIV_status + symptoms_duration_cough_duration + socioeconomic_status
# do it one by one
# age
model1 <- glm(TB_score_2cat ~ as.factor(Western_alr_cat) + as.factor(Eastern_alr_cat) + as.factor(Southeastern_alr_cat)+ Intro10 + as.factor(age_cat), data = clean, family = binomial, na.action = na.exclude)
summary(model1)
##
## Call:
## glm(formula = TB_score_2cat ~ as.factor(Western_alr_cat) + as.factor(Eastern_alr_cat) +
## as.factor(Southeastern_alr_cat) + Intro10 + as.factor(age_cat),
## family = binomial, data = clean, na.action = na.exclude)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.52630 0.34735 1.515 0.1297
## as.factor(Western_alr_cat)2 0.24284 0.33587 0.723 0.4697
## as.factor(Western_alr_cat)3 0.30920 0.29822 1.037 0.2998
## as.factor(Western_alr_cat)4 0.23192 0.29411 0.789 0.4304
## as.factor(Eastern_alr_cat)2 0.04839 0.24681 0.196 0.8446
## as.factor(Eastern_alr_cat)3 -0.09932 0.25479 -0.390 0.6967
## as.factor(Southeastern_alr_cat)2 -0.37162 0.20240 -1.836 0.0664 .
## as.factor(Southeastern_alr_cat)3 0.09299 0.21854 0.426 0.6705
## Intro101 0.05247 0.15853 0.331 0.7407
## as.factor(age_cat)2 0.37085 0.17707 2.094 0.0362 *
## as.factor(age_cat)3 0.02118 0.21399 0.099 0.9211
## as.factor(age_cat)4 0.37656 0.31716 1.187 0.2351
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1026.6 on 826 degrees of freedom
## Residual deviance: 1013.1 on 815 degrees of freedom
## AIC: 1037.1
##
## Number of Fisher Scoring iterations: 4
# add the covariates
# + age + patient_sex + TB_RF_smoking + HIV_status + symptoms_duration_cough_duration
# do it one by one
# sex
model1 <- glm(TB_score_2cat ~ as.factor(Western_alr_cat) + as.factor(Eastern_alr_cat) + as.factor(Southeastern_alr_cat)+ Intro10 + as.factor(age_cat) + patient_sex, data = clean, family = binomial, na.action = na.exclude)
summary(model1)
##
## Call:
## glm(formula = TB_score_2cat ~ as.factor(Western_alr_cat) + as.factor(Eastern_alr_cat) +
## as.factor(Southeastern_alr_cat) + Intro10 + as.factor(age_cat) +
## patient_sex, family = binomial, data = clean, na.action = na.exclude)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.63772 0.35773 1.783 0.0746 .
## as.factor(Western_alr_cat)2 0.22245 0.33652 0.661 0.5086
## as.factor(Western_alr_cat)3 0.30203 0.29870 1.011 0.3120
## as.factor(Western_alr_cat)4 0.21834 0.29463 0.741 0.4587
## as.factor(Eastern_alr_cat)2 0.02713 0.24754 0.110 0.9127
## as.factor(Eastern_alr_cat)3 -0.10608 0.25506 -0.416 0.6775
## as.factor(Southeastern_alr_cat)2 -0.37089 0.20269 -1.830 0.0673 .
## as.factor(Southeastern_alr_cat)3 0.09722 0.21879 0.444 0.6568
## Intro101 0.03381 0.15927 0.212 0.8319
## as.factor(age_cat)2 0.34720 0.17812 1.949 0.0513 .
## as.factor(age_cat)3 -0.01388 0.21577 -0.064 0.9487
## as.factor(age_cat)4 0.32375 0.31982 1.012 0.3114
## patient_sexfemale -0.23097 0.17026 -1.357 0.1749
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1026.6 on 826 degrees of freedom
## Residual deviance: 1011.3 on 814 degrees of freedom
## AIC: 1037.3
##
## Number of Fisher Scoring iterations: 4
# add the covariates
# + age + patient_sex + TB_RF_smoking + symptoms_duration_cough_duration
# do it one by one
# smoking
model1 <- glm(TB_score_2cat ~ as.factor(Western_alr_cat) + as.factor(Eastern_alr_cat) + as.factor(Southeastern_alr_cat)+ Intro10 + as.factor(age_cat) + patient_sex + TB_RF_smoking, data = clean, family = binomial, na.action = na.exclude)
summary(model1)
##
## Call:
## glm(formula = TB_score_2cat ~ as.factor(Western_alr_cat) + as.factor(Eastern_alr_cat) +
## as.factor(Southeastern_alr_cat) + Intro10 + as.factor(age_cat) +
## patient_sex + TB_RF_smoking, family = binomial, data = clean,
## na.action = na.exclude)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.69694 0.36071 1.932 0.0533 .
## as.factor(Western_alr_cat)2 0.20345 0.33728 0.603 0.5464
## as.factor(Western_alr_cat)3 0.30593 0.29911 1.023 0.3064
## as.factor(Western_alr_cat)4 0.20414 0.29528 0.691 0.4894
## as.factor(Eastern_alr_cat)2 0.04765 0.24835 0.192 0.8478
## as.factor(Eastern_alr_cat)3 -0.09191 0.25552 -0.360 0.7191
## as.factor(Southeastern_alr_cat)2 -0.36857 0.20295 -1.816 0.0694 .
## as.factor(Southeastern_alr_cat)3 0.09789 0.21895 0.447 0.6548
## Intro101 0.04199 0.15957 0.263 0.7924
## as.factor(age_cat)2 0.37346 0.17943 2.081 0.0374 *
## as.factor(age_cat)3 0.05038 0.22112 0.228 0.8198
## as.factor(age_cat)4 0.37767 0.32263 1.171 0.2418
## patient_sexfemale -0.31936 0.18241 -1.751 0.0800 .
## TB_RF_smokingyes -0.27066 0.19257 -1.406 0.1599
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1026.6 on 826 degrees of freedom
## Residual deviance: 1009.4 on 813 degrees of freedom
## AIC: 1037.4
##
## Number of Fisher Scoring iterations: 4
# add the covariates
# + age + patient_sex + TB_RF_smoking + symptoms_duration_cough_duration
# do it one by one
# cough duration
model1 <- glm(TB_score_2cat ~ as.factor(Western_alr_cat) + as.factor(Eastern_alr_cat) + as.factor(Southeastern_alr_cat)+ Intro10 + as.factor(age_cat) + patient_sex + TB_RF_smoking + as.factor(cough_cat), data = clean, family = binomial, na.action = na.exclude)
summary(model1)
##
## Call:
## glm(formula = TB_score_2cat ~ as.factor(Western_alr_cat) + as.factor(Eastern_alr_cat) +
## as.factor(Southeastern_alr_cat) + Intro10 + as.factor(age_cat) +
## patient_sex + TB_RF_smoking + as.factor(cough_cat), family = binomial,
## data = clean, na.action = na.exclude)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.93537 0.39941 2.342 0.0192 *
## as.factor(Western_alr_cat)2 0.17151 0.33866 0.506 0.6126
## as.factor(Western_alr_cat)3 0.29047 0.30015 0.968 0.3332
## as.factor(Western_alr_cat)4 0.17752 0.29657 0.599 0.5495
## as.factor(Eastern_alr_cat)2 0.05218 0.24883 0.210 0.8339
## as.factor(Eastern_alr_cat)3 -0.09994 0.25601 -0.390 0.6963
## as.factor(Southeastern_alr_cat)2 -0.38882 0.20392 -1.907 0.0566 .
## as.factor(Southeastern_alr_cat)3 0.07958 0.21948 0.363 0.7169
## Intro101 0.05349 0.16027 0.334 0.7386
## as.factor(age_cat)2 0.37649 0.17982 2.094 0.0363 *
## as.factor(age_cat)3 0.05978 0.22173 0.270 0.7875
## as.factor(age_cat)4 0.38458 0.32344 1.189 0.2344
## patient_sexfemale -0.31405 0.18296 -1.717 0.0861 .
## TB_RF_smokingyes -0.28605 0.19309 -1.481 0.1385
## as.factor(cough_cat)2 -0.18385 0.23603 -0.779 0.4360
## as.factor(cough_cat)3 -0.29979 0.18927 -1.584 0.1132
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1026.6 on 826 degrees of freedom
## Residual deviance: 1006.8 on 811 degrees of freedom
## AIC: 1038.8
##
## Number of Fisher Scoring iterations: 4
# add the covariates
# + age + patient_sex + TB_RF_smoking + symptoms_duration_cough_duration + socioeconomic_status
# do it one by one
# socioeconomic_status
model1 <- glm(TB_score_2cat ~ as.factor(Western_alr_cat) + as.factor(Eastern_alr_cat) + as.factor(Southeastern_alr_cat)+ Intro10 + as.factor(age_cat) + patient_sex + TB_RF_smoking + as.factor(cough_cat) + socio_cat, data = clean, family = binomial, na.action = na.exclude)
summary(model1)
##
## Call:
## glm(formula = TB_score_2cat ~ as.factor(Western_alr_cat) + as.factor(Eastern_alr_cat) +
## as.factor(Southeastern_alr_cat) + Intro10 + as.factor(age_cat) +
## patient_sex + TB_RF_smoking + as.factor(cough_cat) + socio_cat,
## family = binomial, data = clean, na.action = na.exclude)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.71842 0.41195 1.744 0.0812 .
## as.factor(Western_alr_cat)2 0.18827 0.33952 0.555 0.5792
## as.factor(Western_alr_cat)3 0.28172 0.30079 0.937 0.3490
## as.factor(Western_alr_cat)4 0.13398 0.29795 0.450 0.6529
## as.factor(Eastern_alr_cat)2 0.06737 0.24964 0.270 0.7873
## as.factor(Eastern_alr_cat)3 -0.07102 0.25702 -0.276 0.7823
## as.factor(Southeastern_alr_cat)2 -0.36137 0.20600 -1.754 0.0794 .
## as.factor(Southeastern_alr_cat)3 0.15173 0.22299 0.680 0.4962
## Intro101 0.05134 0.16080 0.319 0.7495
## as.factor(age_cat)2 0.38071 0.18054 2.109 0.0350 *
## as.factor(age_cat)3 0.06618 0.22248 0.297 0.7661
## as.factor(age_cat)4 0.40480 0.32496 1.246 0.2129
## patient_sexfemale -0.26222 0.18522 -1.416 0.1569
## TB_RF_smokingyes -0.30662 0.19507 -1.572 0.1160
## as.factor(cough_cat)2 -0.16481 0.23708 -0.695 0.4869
## as.factor(cough_cat)3 -0.29879 0.19042 -1.569 0.1166
## socio_cat2 0.36679 0.17564 2.088 0.0368 *
## socio_cat3 0.34769 0.24187 1.437 0.1506
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1026.6 on 826 degrees of freedom
## Residual deviance: 1001.6 on 809 degrees of freedom
## AIC: 1037.6
##
## Number of Fisher Scoring iterations: 4
# add the covariates
# do it one by one
model1 <- glm(TB_score_2cat ~ as.factor(Western_alr_cat) + as.factor(Eastern_alr_cat) + as.factor(Southeastern_alr_cat)+ Intro10 + as.factor(age_cat) + patient_sex + TB_RF_smoking + as.factor(cough_cat) + socio_cat + general_examination_malnutrition, data = clean, family = binomial, na.action = na.exclude)
summary(model1)
##
## Call:
## glm(formula = TB_score_2cat ~ as.factor(Western_alr_cat) + as.factor(Eastern_alr_cat) +
## as.factor(Southeastern_alr_cat) + Intro10 + as.factor(age_cat) +
## patient_sex + TB_RF_smoking + as.factor(cough_cat) + socio_cat +
## general_examination_malnutrition, family = binomial, data = clean,
## na.action = na.exclude)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.086537 0.436956 2.487 0.0129 *
## as.factor(Western_alr_cat)2 0.018147 0.363150 0.050 0.9601
## as.factor(Western_alr_cat)3 0.038595 0.323091 0.119 0.9049
## as.factor(Western_alr_cat)4 -0.134194 0.320310 -0.419 0.6753
## as.factor(Eastern_alr_cat)2 0.128064 0.259859 0.493 0.6221
## as.factor(Eastern_alr_cat)3 0.055175 0.267752 0.206 0.8367
## as.factor(Southeastern_alr_cat)2 -0.471217 0.216684 -2.175 0.0297 *
## as.factor(Southeastern_alr_cat)3 0.059076 0.234298 0.252 0.8009
## Intro101 -0.006882 0.167821 -0.041 0.9673
## as.factor(age_cat)2 0.366365 0.188392 1.945 0.0518 .
## as.factor(age_cat)3 0.046685 0.232749 0.201 0.8410
## as.factor(age_cat)4 0.431431 0.340247 1.268 0.2048
## patient_sexfemale -0.244903 0.192871 -1.270 0.2042
## TB_RF_smokingyes -0.132489 0.206049 -0.643 0.5202
## as.factor(cough_cat)2 -0.149243 0.245830 -0.607 0.5438
## as.factor(cough_cat)3 -0.220901 0.197253 -1.120 0.2628
## socio_cat2 0.472632 0.185684 2.545 0.0109 *
## socio_cat3 0.295884 0.251026 1.179 0.2385
## general_examination_malnutritionyes -1.873332 0.249238 -7.516 5.64e-14 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1026.59 on 826 degrees of freedom
## Residual deviance: 939.15 on 808 degrees of freedom
## AIC: 977.15
##
## Number of Fisher Scoring iterations: 4
# add the covariates
# do it one by one
model1 <- glm(TB_score_2cat ~ as.factor(Western_alr_cat) + as.factor(Eastern_alr_cat) + as.factor(Southeastern_alr_cat)+ Intro10 + as.factor(age_cat) + patient_sex + TB_RF_smoking + as.factor(cough_cat) + socio_cat + general_examination_malnutrition + TB_category, data = clean, family = binomial, na.action = na.exclude)
summary(model1)
##
## Call:
## glm(formula = TB_score_2cat ~ as.factor(Western_alr_cat) + as.factor(Eastern_alr_cat) +
## as.factor(Southeastern_alr_cat) + Intro10 + as.factor(age_cat) +
## patient_sex + TB_RF_smoking + as.factor(cough_cat) + socio_cat +
## general_examination_malnutrition + TB_category, family = binomial,
## data = clean, na.action = na.exclude)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.06671 0.43775 2.437 0.0148 *
## as.factor(Western_alr_cat)2 0.01939 0.36356 0.053 0.9575
## as.factor(Western_alr_cat)3 0.05007 0.32370 0.155 0.8771
## as.factor(Western_alr_cat)4 -0.12293 0.32090 -0.383 0.7017
## as.factor(Eastern_alr_cat)2 0.13856 0.26040 0.532 0.5947
## as.factor(Eastern_alr_cat)3 0.05188 0.26802 0.194 0.8465
## as.factor(Southeastern_alr_cat)2 -0.46632 0.21696 -2.149 0.0316 *
## as.factor(Southeastern_alr_cat)3 0.07586 0.23526 0.322 0.7471
## Intro101 0.00385 0.16832 0.023 0.9818
## as.factor(age_cat)2 0.37088 0.18865 1.966 0.0493 *
## as.factor(age_cat)3 0.05200 0.23298 0.223 0.8234
## as.factor(age_cat)4 0.44592 0.34042 1.310 0.1902
## patient_sexfemale -0.24775 0.19304 -1.283 0.1993
## TB_RF_smokingyes -0.12296 0.20647 -0.596 0.5515
## as.factor(cough_cat)2 -0.15802 0.24615 -0.642 0.5209
## as.factor(cough_cat)3 -0.21722 0.19737 -1.101 0.2711
## socio_cat2 0.47638 0.18594 2.562 0.0104 *
## socio_cat3 0.29646 0.25117 1.180 0.2379
## general_examination_malnutritionyes -1.88471 0.24970 -7.548 4.42e-14 ***
## TB_categoryrelapse -0.65192 0.65412 -0.997 0.3189
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1026.59 on 826 degrees of freedom
## Residual deviance: 938.22 on 807 degrees of freedom
## AIC: 978.22
##
## Number of Fisher Scoring iterations: 4
# add the covariates
# do it one by one
model1 <- glm(TB_score_2cat ~ as.factor(Western_alr_cat) + as.factor(Eastern_alr_cat) + as.factor(Southeastern_alr_cat)+ Intro10 + as.factor(age_cat) + patient_sex + TB_RF_smoking + as.factor(cough_cat) + socio_cat + general_examination_malnutrition + TB_category + Education, data = clean, family = binomial, na.action = na.exclude)
summary(model1)
##
## Call:
## glm(formula = TB_score_2cat ~ as.factor(Western_alr_cat) + as.factor(Eastern_alr_cat) +
## as.factor(Southeastern_alr_cat) + Intro10 + as.factor(age_cat) +
## patient_sex + TB_RF_smoking + as.factor(cough_cat) + socio_cat +
## general_examination_malnutrition + TB_category + Education,
## family = binomial, data = clean, na.action = na.exclude)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.167458 0.444357 2.627 0.00861 **
## as.factor(Western_alr_cat)2 0.006276 0.364738 0.017 0.98627
## as.factor(Western_alr_cat)3 0.065362 0.324585 0.201 0.84041
## as.factor(Western_alr_cat)4 -0.088686 0.322586 -0.275 0.78338
## as.factor(Eastern_alr_cat)2 0.151207 0.261237 0.579 0.56272
## as.factor(Eastern_alr_cat)3 0.064851 0.269427 0.241 0.80979
## as.factor(Southeastern_alr_cat)2 -0.486446 0.218141 -2.230 0.02575 *
## as.factor(Southeastern_alr_cat)3 0.044320 0.239368 0.185 0.85311
## Intro101 0.007642 0.168652 0.045 0.96386
## as.factor(age_cat)2 0.304707 0.197869 1.540 0.12358
## as.factor(age_cat)3 -0.018197 0.239702 -0.076 0.93949
## as.factor(age_cat)4 0.398312 0.343699 1.159 0.24650
## patient_sexfemale -0.249638 0.194145 -1.286 0.19850
## TB_RF_smokingyes -0.128587 0.208658 -0.616 0.53772
## as.factor(cough_cat)2 -0.156419 0.246721 -0.634 0.52608
## as.factor(cough_cat)3 -0.223643 0.198169 -1.129 0.25909
## socio_cat2 0.480158 0.187127 2.566 0.01029 *
## socio_cat3 0.285483 0.254471 1.122 0.26192
## general_examination_malnutritionyes -1.873419 0.252732 -7.413 1.24e-13 ***
## TB_categoryrelapse -0.641757 0.652758 -0.983 0.32553
## Educationno -0.114220 0.254724 -0.448 0.65386
## Educationsecondary -0.318200 0.221965 -1.434 0.15170
## Educationuniversity 0.086127 0.416952 0.207 0.83635
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1026.6 on 826 degrees of freedom
## Residual deviance: 935.9 on 804 degrees of freedom
## AIC: 981.9
##
## Number of Fisher Scoring iterations: 4
# add the covariates
# do it one by one
model1 <- glm(TB_score_2cat ~ as.factor(Western_alr_cat) + as.factor(Eastern_alr_cat) + as.factor(Southeastern_alr_cat)+ Intro10 + as.factor(age_cat) + patient_sex + TB_RF_smoking + as.factor(cough_cat) + socio_cat + general_examination_malnutrition + TB_category + Education + DR_category, data = clean, family = binomial, na.action = na.exclude)
summary(model1)
##
## Call:
## glm(formula = TB_score_2cat ~ as.factor(Western_alr_cat) + as.factor(Eastern_alr_cat) +
## as.factor(Southeastern_alr_cat) + Intro10 + as.factor(age_cat) +
## patient_sex + TB_RF_smoking + as.factor(cough_cat) + socio_cat +
## general_examination_malnutrition + TB_category + Education +
## DR_category, family = binomial, data = clean, na.action = na.exclude)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.14273 0.44427 2.572 0.0101 *
## as.factor(Western_alr_cat)2 0.01017 0.36477 0.028 0.9778
## as.factor(Western_alr_cat)3 0.07840 0.32492 0.241 0.8093
## as.factor(Western_alr_cat)4 -0.07257 0.32295 -0.225 0.8222
## as.factor(Eastern_alr_cat)2 0.15699 0.26218 0.599 0.5493
## as.factor(Eastern_alr_cat)3 0.07763 0.27054 0.287 0.7742
## as.factor(Southeastern_alr_cat)2 -0.50031 0.21867 -2.288 0.0221 *
## as.factor(Southeastern_alr_cat)3 0.04098 0.23960 0.171 0.8642
## Intro101 0.01527 0.16894 0.090 0.9280
## as.factor(age_cat)2 0.29100 0.19811 1.469 0.1419
## as.factor(age_cat)3 -0.02920 0.24028 -0.122 0.9033
## as.factor(age_cat)4 0.38147 0.34426 1.108 0.2678
## patient_sexfemale -0.25589 0.19440 -1.316 0.1881
## TB_RF_smokingyes -0.12069 0.20907 -0.577 0.5638
## as.factor(cough_cat)2 -0.15967 0.24702 -0.646 0.5180
## as.factor(cough_cat)3 -0.22468 0.19821 -1.134 0.2570
## socio_cat2 0.47462 0.18749 2.531 0.0114 *
## socio_cat3 0.28641 0.25426 1.126 0.2600
## general_examination_malnutritionyes -1.87628 0.25310 -7.413 1.23e-13 ***
## TB_categoryrelapse -0.62827 0.65276 -0.962 0.3358
## Educationno -0.11636 0.25462 -0.457 0.6477
## Educationsecondary -0.31096 0.22216 -1.400 0.1616
## Educationuniversity 0.09543 0.41745 0.229 0.8192
## DR_categoryresistant 0.46484 0.44231 1.051 0.2933
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1026.59 on 826 degrees of freedom
## Residual deviance: 934.73 on 803 degrees of freedom
## AIC: 982.73
##
## Number of Fisher Scoring iterations: 4
library(lmtest)
model1 <- glm(TB_score_2cat ~ as.factor(Western_alr_cat) + as.factor(Eastern_alr_cat) + as.factor(Southeastern_alr_cat) + Intro10 + as.factor(age_cat) + patient_sex + TB_RF_smoking+ as.factor(cough_cat) + socio_cat+ general_examination_malnutrition + TB_category + Education + DR_category, data = clean, family = binomial, na.action = na.exclude)
model2 <- glm(TB_score_2cat ~ Intro10 + as.factor(age_cat) + patient_sex + TB_RF_smoking+ as.factor(cough_cat) + socio_cat+ general_examination_malnutrition + TB_category + Education + DR_category, data = clean, family = binomial, na.action = na.exclude)
summary(model1)
##
## Call:
## glm(formula = TB_score_2cat ~ as.factor(Western_alr_cat) + as.factor(Eastern_alr_cat) +
## as.factor(Southeastern_alr_cat) + Intro10 + as.factor(age_cat) +
## patient_sex + TB_RF_smoking + as.factor(cough_cat) + socio_cat +
## general_examination_malnutrition + TB_category + Education +
## DR_category, family = binomial, data = clean, na.action = na.exclude)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.14273 0.44427 2.572 0.0101 *
## as.factor(Western_alr_cat)2 0.01017 0.36477 0.028 0.9778
## as.factor(Western_alr_cat)3 0.07840 0.32492 0.241 0.8093
## as.factor(Western_alr_cat)4 -0.07257 0.32295 -0.225 0.8222
## as.factor(Eastern_alr_cat)2 0.15699 0.26218 0.599 0.5493
## as.factor(Eastern_alr_cat)3 0.07763 0.27054 0.287 0.7742
## as.factor(Southeastern_alr_cat)2 -0.50031 0.21867 -2.288 0.0221 *
## as.factor(Southeastern_alr_cat)3 0.04098 0.23960 0.171 0.8642
## Intro101 0.01527 0.16894 0.090 0.9280
## as.factor(age_cat)2 0.29100 0.19811 1.469 0.1419
## as.factor(age_cat)3 -0.02920 0.24028 -0.122 0.9033
## as.factor(age_cat)4 0.38147 0.34426 1.108 0.2678
## patient_sexfemale -0.25589 0.19440 -1.316 0.1881
## TB_RF_smokingyes -0.12069 0.20907 -0.577 0.5638
## as.factor(cough_cat)2 -0.15967 0.24702 -0.646 0.5180
## as.factor(cough_cat)3 -0.22468 0.19821 -1.134 0.2570
## socio_cat2 0.47462 0.18749 2.531 0.0114 *
## socio_cat3 0.28641 0.25426 1.126 0.2600
## general_examination_malnutritionyes -1.87628 0.25310 -7.413 1.23e-13 ***
## TB_categoryrelapse -0.62827 0.65276 -0.962 0.3358
## Educationno -0.11636 0.25462 -0.457 0.6477
## Educationsecondary -0.31096 0.22216 -1.400 0.1616
## Educationuniversity 0.09543 0.41745 0.229 0.8192
## DR_categoryresistant 0.46484 0.44231 1.051 0.2933
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1026.59 on 826 degrees of freedom
## Residual deviance: 934.73 on 803 degrees of freedom
## AIC: 982.73
##
## Number of Fisher Scoring iterations: 4
lrtest(model1, model2)
## Likelihood ratio test
##
## Model 1: TB_score_2cat ~ as.factor(Western_alr_cat) + as.factor(Eastern_alr_cat) +
## as.factor(Southeastern_alr_cat) + Intro10 + as.factor(age_cat) +
## patient_sex + TB_RF_smoking + as.factor(cough_cat) + socio_cat +
## general_examination_malnutrition + TB_category + Education +
## DR_category
## Model 2: TB_score_2cat ~ Intro10 + as.factor(age_cat) + patient_sex +
## TB_RF_smoking + as.factor(cough_cat) + socio_cat + general_examination_malnutrition +
## TB_category + Education + DR_category
## #Df LogLik Df Chisq Pr(>Chisq)
## 1 24 -467.36
## 2 17 -472.71 -7 10.698 0.1523
ORs <- exp(cbind(OR = coef(model1), confint(model1)))
## Waiting for profiling to be done...
round(ORs, digits = 2)
## OR 2.5 % 97.5 %
## (Intercept) 3.14 1.33 7.59
## as.factor(Western_alr_cat)2 1.01 0.49 2.05
## as.factor(Western_alr_cat)3 1.08 0.56 2.02
## as.factor(Western_alr_cat)4 0.93 0.49 1.73
## as.factor(Eastern_alr_cat)2 1.17 0.69 1.95
## as.factor(Eastern_alr_cat)3 1.08 0.63 1.83
## as.factor(Southeastern_alr_cat)2 0.61 0.39 0.93
## as.factor(Southeastern_alr_cat)3 1.04 0.65 1.66
## Intro101 1.02 0.73 1.42
## as.factor(age_cat)2 1.34 0.91 1.98
## as.factor(age_cat)3 0.97 0.61 1.56
## as.factor(age_cat)4 1.46 0.76 2.94
## patient_sexfemale 0.77 0.53 1.13
## TB_RF_smokingyes 0.89 0.59 1.34
## as.factor(cough_cat)2 0.85 0.53 1.39
## as.factor(cough_cat)3 0.80 0.54 1.17
## socio_cat2 1.61 1.12 2.33
## socio_cat3 1.33 0.82 2.21
## general_examination_malnutritionyes 0.15 0.09 0.25
## TB_categoryrelapse 0.53 0.15 2.12
## Educationno 0.89 0.54 1.48
## Educationsecondary 0.73 0.48 1.14
## Educationuniversity 1.10 0.50 2.62
## DR_categoryresistant 1.59 0.70 4.04
library(lmtest)
model1 <- glm(TB_score_2cat ~ as.factor(Western_alr_cat)* Intro10 + as.factor(Eastern_alr_cat) * Intro10 + as.factor(Southeastern_alr_cat)* Intro10 + as.factor(age_cat) + patient_sex + TB_RF_smoking+ as.factor(cough_cat) + socio_cat + general_examination_malnutrition + TB_category + Education + DR_category, data = clean, family = binomial, na.action = na.exclude)
summary(model1)
##
## Call:
## glm(formula = TB_score_2cat ~ as.factor(Western_alr_cat) * Intro10 +
## as.factor(Eastern_alr_cat) * Intro10 + as.factor(Southeastern_alr_cat) *
## Intro10 + as.factor(age_cat) + patient_sex + TB_RF_smoking +
## as.factor(cough_cat) + socio_cat + general_examination_malnutrition +
## TB_category + Education + DR_category, family = binomial,
## data = clean, na.action = na.exclude)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.46849 0.53349 2.753 0.00591
## as.factor(Western_alr_cat)2 -0.28675 0.46376 -0.618 0.53636
## as.factor(Western_alr_cat)3 -0.07394 0.42219 -0.175 0.86098
## as.factor(Western_alr_cat)4 -0.29555 0.41773 -0.708 0.47925
## Intro101 -0.86340 0.73782 -1.170 0.24192
## as.factor(Eastern_alr_cat)2 0.05067 0.32837 0.154 0.87737
## as.factor(Eastern_alr_cat)3 -0.07201 0.34529 -0.209 0.83480
## as.factor(Southeastern_alr_cat)2 -0.55587 0.27225 -2.042 0.04118
## as.factor(Southeastern_alr_cat)3 0.04079 0.29617 0.138 0.89047
## as.factor(age_cat)2 0.28276 0.19927 1.419 0.15591
## as.factor(age_cat)3 -0.02029 0.24114 -0.084 0.93294
## as.factor(age_cat)4 0.38609 0.34616 1.115 0.26470
## patient_sexfemale -0.25779 0.19549 -1.319 0.18727
## TB_RF_smokingyes -0.10814 0.21008 -0.515 0.60672
## as.factor(cough_cat)2 -0.16449 0.24849 -0.662 0.50799
## as.factor(cough_cat)3 -0.22390 0.19932 -1.123 0.26130
## socio_cat2 0.48885 0.18837 2.595 0.00945
## socio_cat3 0.28663 0.25560 1.121 0.26211
## general_examination_malnutritionyes -1.88138 0.25416 -7.402 1.34e-13
## TB_categoryrelapse -0.60959 0.65664 -0.928 0.35323
## Educationno -0.10578 0.25685 -0.412 0.68045
## Educationsecondary -0.31445 0.22406 -1.403 0.16050
## Educationuniversity 0.06849 0.41872 0.164 0.87006
## DR_categoryresistant 0.46243 0.44616 1.036 0.29999
## as.factor(Western_alr_cat)2:Intro101 0.83716 0.77162 1.085 0.27795
## as.factor(Western_alr_cat)3:Intro101 0.33895 0.66914 0.507 0.61248
## as.factor(Western_alr_cat)4:Intro101 0.55788 0.65876 0.847 0.39707
## Intro101:as.factor(Eastern_alr_cat)2 0.24273 0.55593 0.437 0.66239
## Intro101:as.factor(Eastern_alr_cat)3 0.37890 0.56201 0.674 0.50019
## Intro101:as.factor(Southeastern_alr_cat)2 0.25745 0.45792 0.562 0.57398
## Intro101:as.factor(Southeastern_alr_cat)3 0.04920 0.48695 0.101 0.91952
##
## (Intercept) **
## as.factor(Western_alr_cat)2
## as.factor(Western_alr_cat)3
## as.factor(Western_alr_cat)4
## Intro101
## as.factor(Eastern_alr_cat)2
## as.factor(Eastern_alr_cat)3
## as.factor(Southeastern_alr_cat)2 *
## as.factor(Southeastern_alr_cat)3
## as.factor(age_cat)2
## as.factor(age_cat)3
## as.factor(age_cat)4
## patient_sexfemale
## TB_RF_smokingyes
## as.factor(cough_cat)2
## as.factor(cough_cat)3
## socio_cat2 **
## socio_cat3
## general_examination_malnutritionyes ***
## TB_categoryrelapse
## Educationno
## Educationsecondary
## Educationuniversity
## DR_categoryresistant
## as.factor(Western_alr_cat)2:Intro101
## as.factor(Western_alr_cat)3:Intro101
## as.factor(Western_alr_cat)4:Intro101
## Intro101:as.factor(Eastern_alr_cat)2
## Intro101:as.factor(Eastern_alr_cat)3
## Intro101:as.factor(Southeastern_alr_cat)2
## Intro101:as.factor(Southeastern_alr_cat)3
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1026.59 on 826 degrees of freedom
## Residual deviance: 932.14 on 796 degrees of freedom
## AIC: 994.14
##
## Number of Fisher Scoring iterations: 4
# compare to a model without the interaction
model2 <- glm(TB_score_2cat ~ as.factor(Western_alr_cat) + as.factor(Eastern_alr_cat)+ as.factor(Southeastern_alr_cat) + Intro10 + as.factor(age_cat) + patient_sex + TB_RF_smoking+ as.factor(cough_cat) + socio_cat+ general_examination_malnutrition + TB_category + Education + DR_category, data = clean, family = binomial, na.action = na.exclude)
lrtest(model1, model2)
## Likelihood ratio test
##
## Model 1: TB_score_2cat ~ as.factor(Western_alr_cat) * Intro10 + as.factor(Eastern_alr_cat) *
## Intro10 + as.factor(Southeastern_alr_cat) * Intro10 + as.factor(age_cat) +
## patient_sex + TB_RF_smoking + as.factor(cough_cat) + socio_cat +
## general_examination_malnutrition + TB_category + Education +
## DR_category
## Model 2: TB_score_2cat ~ as.factor(Western_alr_cat) + as.factor(Eastern_alr_cat) +
## as.factor(Southeastern_alr_cat) + Intro10 + as.factor(age_cat) +
## patient_sex + TB_RF_smoking + as.factor(cough_cat) + socio_cat +
## general_examination_malnutrition + TB_category + Education +
## DR_category
## #Df LogLik Df Chisq Pr(>Chisq)
## 1 31 -466.07
## 2 24 -467.36 -7 2.583 0.9207
ORs <- exp(cbind(OR = coef(model1), confint(model1)))
## Waiting for profiling to be done...
round(ORs, digits = 2)
## OR 2.5 % 97.5 %
## (Intercept) 4.34 1.56 12.69
## as.factor(Western_alr_cat)2 0.75 0.30 1.84
## as.factor(Western_alr_cat)3 0.93 0.39 2.08
## as.factor(Western_alr_cat)4 0.74 0.32 1.65
## Intro101 0.42 0.10 1.81
## as.factor(Eastern_alr_cat)2 1.05 0.54 1.98
## as.factor(Eastern_alr_cat)3 0.93 0.47 1.81
## as.factor(Southeastern_alr_cat)2 0.57 0.33 0.97
## as.factor(Southeastern_alr_cat)3 1.04 0.58 1.86
## as.factor(age_cat)2 1.33 0.90 1.96
## as.factor(age_cat)3 0.98 0.61 1.58
## as.factor(age_cat)4 1.47 0.76 2.97
## patient_sexfemale 0.77 0.53 1.14
## TB_RF_smokingyes 0.90 0.60 1.36
## as.factor(cough_cat)2 0.85 0.52 1.38
## as.factor(cough_cat)3 0.80 0.54 1.18
## socio_cat2 1.63 1.13 2.37
## socio_cat3 1.33 0.81 2.22
## general_examination_malnutritionyes 0.15 0.09 0.25
## TB_categoryrelapse 0.54 0.15 2.18
## Educationno 0.90 0.55 1.50
## Educationsecondary 0.73 0.47 1.14
## Educationuniversity 1.07 0.49 2.56
## DR_categoryresistant 1.59 0.69 4.06
## as.factor(Western_alr_cat)2:Intro101 2.31 0.51 10.65
## as.factor(Western_alr_cat)3:Intro101 1.40 0.38 5.25
## as.factor(Western_alr_cat)4:Intro101 1.75 0.48 6.39
## Intro101:as.factor(Eastern_alr_cat)2 1.27 0.42 3.78
## Intro101:as.factor(Eastern_alr_cat)3 1.46 0.48 4.39
## Intro101:as.factor(Southeastern_alr_cat)2 1.29 0.52 3.17
## Intro101:as.factor(Southeastern_alr_cat)3 1.05 0.40 2.72
–> no it does not
library(lmtest)
model1 <- glm(TB_score_2cat ~ as.factor(Western_alr_cat)* Intro10 + as.factor(Eastern_alr_cat) * Intro10 + as.factor(Southeastern_alr_cat)* Intro10 + as.factor(age_cat) + patient_sex + TB_RF_smoking+ socio_cat + general_examination_malnutrition + TB_category + Education + DR_category, data = clean, family = binomial, na.action = na.exclude)
summary(model1)
##
## Call:
## glm(formula = TB_score_2cat ~ as.factor(Western_alr_cat) * Intro10 +
## as.factor(Eastern_alr_cat) * Intro10 + as.factor(Southeastern_alr_cat) *
## Intro10 + as.factor(age_cat) + patient_sex + TB_RF_smoking +
## socio_cat + general_examination_malnutrition + TB_category +
## Education + DR_category, family = binomial, data = clean,
## na.action = na.exclude)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.29477 0.50706 2.553 0.01067
## as.factor(Western_alr_cat)2 -0.28334 0.46418 -0.610 0.54159
## as.factor(Western_alr_cat)3 -0.08695 0.42276 -0.206 0.83704
## as.factor(Western_alr_cat)4 -0.29141 0.41859 -0.696 0.48633
## Intro101 -0.89491 0.73870 -1.211 0.22572
## as.factor(Eastern_alr_cat)2 0.05466 0.32786 0.167 0.86760
## as.factor(Eastern_alr_cat)3 -0.06192 0.34453 -0.180 0.85737
## as.factor(Southeastern_alr_cat)2 -0.53776 0.27134 -1.982 0.04750
## as.factor(Southeastern_alr_cat)3 0.05851 0.29541 0.198 0.84299
## as.factor(age_cat)2 0.28057 0.19918 1.409 0.15895
## as.factor(age_cat)3 -0.02518 0.24041 -0.105 0.91657
## as.factor(age_cat)4 0.37966 0.34563 1.098 0.27200
## patient_sexfemale -0.26135 0.19516 -1.339 0.18051
## TB_RF_smokingyes -0.09774 0.20954 -0.466 0.64088
## socio_cat2 0.48509 0.18802 2.580 0.00988
## socio_cat3 0.30416 0.25508 1.192 0.23310
## general_examination_malnutritionyes -1.89609 0.25351 -7.479 7.46e-14
## TB_categoryrelapse -0.62036 0.65347 -0.949 0.34245
## Educationno -0.10148 0.25629 -0.396 0.69215
## Educationsecondary -0.30756 0.22392 -1.374 0.16959
## Educationuniversity 0.09174 0.41773 0.220 0.82617
## DR_categoryresistant 0.46382 0.44616 1.040 0.29854
## as.factor(Western_alr_cat)2:Intro101 0.87734 0.76999 1.139 0.25453
## as.factor(Western_alr_cat)3:Intro101 0.38940 0.66867 0.582 0.56033
## as.factor(Western_alr_cat)4:Intro101 0.58315 0.65914 0.885 0.37631
## Intro101:as.factor(Eastern_alr_cat)2 0.22380 0.55489 0.403 0.68672
## Intro101:as.factor(Eastern_alr_cat)3 0.36973 0.56070 0.659 0.50963
## Intro101:as.factor(Southeastern_alr_cat)2 0.25430 0.45769 0.556 0.57848
## Intro101:as.factor(Southeastern_alr_cat)3 0.04665 0.48649 0.096 0.92361
##
## (Intercept) *
## as.factor(Western_alr_cat)2
## as.factor(Western_alr_cat)3
## as.factor(Western_alr_cat)4
## Intro101
## as.factor(Eastern_alr_cat)2
## as.factor(Eastern_alr_cat)3
## as.factor(Southeastern_alr_cat)2 *
## as.factor(Southeastern_alr_cat)3
## as.factor(age_cat)2
## as.factor(age_cat)3
## as.factor(age_cat)4
## patient_sexfemale
## TB_RF_smokingyes
## socio_cat2 **
## socio_cat3
## general_examination_malnutritionyes ***
## TB_categoryrelapse
## Educationno
## Educationsecondary
## Educationuniversity
## DR_categoryresistant
## as.factor(Western_alr_cat)2:Intro101
## as.factor(Western_alr_cat)3:Intro101
## as.factor(Western_alr_cat)4:Intro101
## Intro101:as.factor(Eastern_alr_cat)2
## Intro101:as.factor(Eastern_alr_cat)3
## Intro101:as.factor(Southeastern_alr_cat)2
## Intro101:as.factor(Southeastern_alr_cat)3
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1026.59 on 826 degrees of freedom
## Residual deviance: 933.42 on 798 degrees of freedom
## AIC: 991.42
##
## Number of Fisher Scoring iterations: 4
# compare to a model without the interaction
model2 <- glm(TB_score_2cat ~ as.factor(Western_alr_cat) + as.factor(Eastern_alr_cat)+ as.factor(Southeastern_alr_cat) + Intro10 + as.factor(age_cat) + patient_sex + TB_RF_smoking+ socio_cat+ general_examination_malnutrition + TB_category + Education + DR_category, data = clean, family = binomial, na.action = na.exclude)
lrtest(model1, model2)
## Likelihood ratio test
##
## Model 1: TB_score_2cat ~ as.factor(Western_alr_cat) * Intro10 + as.factor(Eastern_alr_cat) *
## Intro10 + as.factor(Southeastern_alr_cat) * Intro10 + as.factor(age_cat) +
## patient_sex + TB_RF_smoking + socio_cat + general_examination_malnutrition +
## TB_category + Education + DR_category
## Model 2: TB_score_2cat ~ as.factor(Western_alr_cat) + as.factor(Eastern_alr_cat) +
## as.factor(Southeastern_alr_cat) + Intro10 + as.factor(age_cat) +
## patient_sex + TB_RF_smoking + socio_cat + general_examination_malnutrition +
## TB_category + Education + DR_category
## #Df LogLik Df Chisq Pr(>Chisq)
## 1 29 -466.71
## 2 22 -468.01 -7 2.6048 0.919
ORs <- exp(cbind(OR = coef(model1), confint(model1)))
## Waiting for profiling to be done...
round(ORs, digits = 2)
## OR 2.5 % 97.5 %
## (Intercept) 3.65 1.38 10.15
## as.factor(Western_alr_cat)2 0.75 0.30 1.85
## as.factor(Western_alr_cat)3 0.92 0.39 2.06
## as.factor(Western_alr_cat)4 0.75 0.32 1.66
## Intro101 0.41 0.10 1.75
## as.factor(Eastern_alr_cat)2 1.06 0.55 1.99
## as.factor(Eastern_alr_cat)3 0.94 0.47 1.83
## as.factor(Southeastern_alr_cat)2 0.58 0.34 0.99
## as.factor(Southeastern_alr_cat)3 1.06 0.59 1.89
## as.factor(age_cat)2 1.32 0.90 1.96
## as.factor(age_cat)3 0.98 0.61 1.57
## as.factor(age_cat)4 1.46 0.76 2.95
## patient_sexfemale 0.77 0.53 1.13
## TB_RF_smokingyes 0.91 0.60 1.37
## socio_cat2 1.62 1.13 2.36
## socio_cat3 1.36 0.83 2.26
## general_examination_malnutritionyes 0.15 0.09 0.24
## TB_categoryrelapse 0.54 0.15 2.14
## Educationno 0.90 0.55 1.51
## Educationsecondary 0.74 0.48 1.14
## Educationuniversity 1.10 0.50 2.61
## DR_categoryresistant 1.59 0.69 4.07
## as.factor(Western_alr_cat)2:Intro101 2.40 0.53 11.05
## as.factor(Western_alr_cat)3:Intro101 1.48 0.40 5.51
## as.factor(Western_alr_cat)4:Intro101 1.79 0.49 6.56
## Intro101:as.factor(Eastern_alr_cat)2 1.25 0.42 3.70
## Intro101:as.factor(Eastern_alr_cat)3 1.45 0.48 4.34
## Intro101:as.factor(Southeastern_alr_cat)2 1.29 0.52 3.15
## Intro101:as.factor(Southeastern_alr_cat)3 1.05 0.40 2.71
# create a clean dataset with no missing values for any of the ancestries, bacterial genotype, and the covariates included and it only comprises HIV negative patients
clean <- na.omit(new_data[new_data$HIV_status == "negative", c("Western_alr_cat", "Southeastern_alr_cat", "Eastern_alr_cat", "patient_sex", "age_cat", "symptoms_duration_cough_duration", "Intro10", "TB_RF_smoking", "Ct_value", "cough_cat", "socio_cat", "general_examination_malnutrition", "TB_category", "Education", "DR_category")])
dim(clean)
## [1] 472 15
# Linearity
# for the ancestries as category
model1 <- glm(Ct_value ~ as.factor(Western_alr_cat)+ as.factor(Eastern_alr_cat) + as.factor(Southeastern_alr_cat), data = clean, na.action = na.exclude, family = "gaussian")
par(mfrow = c(1,3))
for (anc in c("Western_alr_cat", "Eastern_alr_cat", "Southeastern_alr_cat")){
boxplot(residuals(model1) ~ clean[[anc]], main = paste("Residuals vs ", anc))
abline(h = 0, col = "red")}
# Homoscedasticity
plot(model1, which = 1)
# Normality of Residuals
# QQ-Plot
qqnorm(residuals(model1))
qqline(residuals(model1), col = "red")
# Histogram of residuals
hist(residuals(model1), main = "Histogram of residuals", xlab = "Residuals")
–> the qq-plot does not look good I would say.
# Linearity
# for the ancestries as category
model1 <- glm(log10(Ct_value) ~ as.factor(Western_alr_cat)+ as.factor(Eastern_alr_cat) + as.factor(Southeastern_alr_cat), data = clean, na.action = na.exclude, family = "gaussian")
par(mfrow = c(1,3))
for (anc in c("Western_alr_cat", "Eastern_alr_cat", "Southeastern_alr_cat")){
boxplot(residuals(model1) ~ clean[[anc]], main = paste("Residuals vs ", anc))
abline(h = 0, col = "red")}
# Homoscedasticity
plot(model1, which = 1)
# Normality of Residuals
# QQ-Plot
qqnorm(residuals(model1))
qqline(residuals(model1), col = "red")
# Histogram of residuals
hist(residuals(model1), main = "Histogram of residuals", xlab = "Residuals")
–> The qq-plot looks much better
library(lmtest)
# Linearity
# for the ancestries as category
model1 <- glm(log10(Ct_value) ~ as.factor(Western_alr_cat)+ as.factor(Eastern_alr_cat) + as.factor(Southeastern_alr_cat), data = clean, na.action = na.exclude, family = "gaussian")
summary(model1)
##
## Call:
## glm(formula = log10(Ct_value) ~ as.factor(Western_alr_cat) +
## as.factor(Eastern_alr_cat) + as.factor(Southeastern_alr_cat),
## family = "gaussian", data = clean, na.action = na.exclude)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.2313393 0.0205035 60.055 <2e-16 ***
## as.factor(Western_alr_cat)2 0.0343062 0.0208663 1.644 0.101
## as.factor(Western_alr_cat)3 0.0308501 0.0187430 1.646 0.100
## as.factor(Western_alr_cat)4 0.0226902 0.0186398 1.217 0.224
## as.factor(Eastern_alr_cat)2 -0.0047696 0.0149368 -0.319 0.750
## as.factor(Eastern_alr_cat)3 0.0147742 0.0153897 0.960 0.338
## as.factor(Southeastern_alr_cat)2 0.0144342 0.0124202 1.162 0.246
## as.factor(Southeastern_alr_cat)3 0.0008782 0.0127372 0.069 0.945
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 0.009729422)
##
## Null deviance: 4.6156 on 471 degrees of freedom
## Residual deviance: 4.5145 on 464 degrees of freedom
## AIC: -837.18
##
## Number of Fisher Scoring iterations: 2
library(lmtest)
# Linearity
# for the ancestries as category
model1 <- glm(log10(Ct_value) ~ as.factor(Western_alr_cat)+ as.factor(Eastern_alr_cat) + as.factor(Southeastern_alr_cat) + Intro10, data = clean, na.action = na.exclude, family = "gaussian")
model2 <- glm(log10(Ct_value) ~ as.factor(Western_alr_cat)+ as.factor(Eastern_alr_cat) + as.factor(Southeastern_alr_cat), data = clean, na.action = na.exclude, family = "gaussian")
summary(model1)
##
## Call:
## glm(formula = log10(Ct_value) ~ as.factor(Western_alr_cat) +
## as.factor(Eastern_alr_cat) + as.factor(Southeastern_alr_cat) +
## Intro10, family = "gaussian", data = clean, na.action = na.exclude)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.2382529 0.0207702 59.617 <2e-16 ***
## as.factor(Western_alr_cat)2 0.0336703 0.0208111 1.618 0.1064
## as.factor(Western_alr_cat)3 0.0305594 0.0186916 1.635 0.1027
## as.factor(Western_alr_cat)4 0.0210707 0.0186078 1.132 0.2581
## as.factor(Eastern_alr_cat)2 -0.0049609 0.0148957 -0.333 0.7393
## as.factor(Eastern_alr_cat)3 0.0168406 0.0153858 1.095 0.2743
## as.factor(Southeastern_alr_cat)2 0.0145739 0.0123860 1.177 0.2399
## as.factor(Southeastern_alr_cat)3 0.0006072 0.0127027 0.048 0.9619
## Intro101 -0.0178019 0.0094024 -1.893 0.0589 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 0.009675524)
##
## Null deviance: 4.6156 on 471 degrees of freedom
## Residual deviance: 4.4798 on 463 degrees of freedom
## AIC: -838.82
##
## Number of Fisher Scoring iterations: 2
lrtest(model1, model2)
## Likelihood ratio test
##
## Model 1: log10(Ct_value) ~ as.factor(Western_alr_cat) + as.factor(Eastern_alr_cat) +
## as.factor(Southeastern_alr_cat) + Intro10
## Model 2: log10(Ct_value) ~ as.factor(Western_alr_cat) + as.factor(Eastern_alr_cat) +
## as.factor(Southeastern_alr_cat)
## #Df LogLik Df Chisq Pr(>Chisq)
## 1 10 429.41
## 2 9 427.59 -1 3.6403 0.0564 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
library(lmtest)
# sex
# for the ancestries as category
model1 <- glm(log10(Ct_value) ~ as.factor(Western_alr_cat)+ as.factor(Eastern_alr_cat) + as.factor(Southeastern_alr_cat) + Intro10 + patient_sex, data = clean, na.action = na.exclude, family = "gaussian")
summary(model1)
##
## Call:
## glm(formula = log10(Ct_value) ~ as.factor(Western_alr_cat) +
## as.factor(Eastern_alr_cat) + as.factor(Southeastern_alr_cat) +
## Intro10 + patient_sex, family = "gaussian", data = clean,
## na.action = na.exclude)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.2365419 0.0211833 58.374 <2e-16 ***
## as.factor(Western_alr_cat)2 0.0338272 0.0208330 1.624 0.1051
## as.factor(Western_alr_cat)3 0.0306377 0.0187092 1.638 0.1022
## as.factor(Western_alr_cat)4 0.0213706 0.0186380 1.147 0.2521
## as.factor(Eastern_alr_cat)2 -0.0046009 0.0149335 -0.308 0.7582
## as.factor(Eastern_alr_cat)3 0.0169047 0.0154002 1.098 0.2729
## as.factor(Southeastern_alr_cat)2 0.0147696 0.0124058 1.191 0.2344
## as.factor(Southeastern_alr_cat)3 0.0006629 0.0127147 0.052 0.9584
## Intro101 -0.0176160 0.0094212 -1.870 0.0621 .
## patient_sexfemale 0.0042923 0.0102094 0.420 0.6744
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 0.009692759)
##
## Null deviance: 4.6156 on 471 degrees of freedom
## Residual deviance: 4.4781 on 462 degrees of freedom
## AIC: -837
##
## Number of Fisher Scoring iterations: 2
library(lmtest)
# age
# for the ancestries as category
model1 <- glm(log10(Ct_value) ~ as.factor(Western_alr_cat)+ as.factor(Eastern_alr_cat) + as.factor(Southeastern_alr_cat) + Intro10 + patient_sex + as.factor(age_cat), data = clean, na.action = na.exclude, family = "gaussian")
summary(model1)
##
## Call:
## glm(formula = log10(Ct_value) ~ as.factor(Western_alr_cat) +
## as.factor(Eastern_alr_cat) + as.factor(Southeastern_alr_cat) +
## Intro10 + patient_sex + as.factor(age_cat), family = "gaussian",
## data = clean, na.action = na.exclude)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.2368544 0.0217637 56.831 <2e-16 ***
## as.factor(Western_alr_cat)2 0.0340527 0.0208940 1.630 0.104
## as.factor(Western_alr_cat)3 0.0306916 0.0187777 1.634 0.103
## as.factor(Western_alr_cat)4 0.0212797 0.0187192 1.137 0.256
## as.factor(Eastern_alr_cat)2 -0.0048509 0.0149732 -0.324 0.746
## as.factor(Eastern_alr_cat)3 0.0168910 0.0154657 1.092 0.275
## as.factor(Southeastern_alr_cat)2 0.0147314 0.0124421 1.184 0.237
## as.factor(Southeastern_alr_cat)3 0.0006154 0.0127598 0.048 0.962
## Intro101 -0.0169224 0.0094836 -1.784 0.075 .
## patient_sexfemale 0.0046668 0.0104160 0.448 0.654
## as.factor(age_cat)2 -0.0016045 0.0106393 -0.151 0.880
## as.factor(age_cat)3 -0.0055395 0.0134709 -0.411 0.681
## as.factor(age_cat)4 0.0113204 0.0180264 0.628 0.530
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 0.009740243)
##
## Null deviance: 4.6156 on 471 degrees of freedom
## Residual deviance: 4.4708 on 459 degrees of freedom
## AIC: -831.77
##
## Number of Fisher Scoring iterations: 2
library(lmtest)
# smoking
# for the ancestries as category
model1 <- glm(log10(Ct_value) ~ as.factor(Western_alr_cat)+ as.factor(Eastern_alr_cat) + as.factor(Southeastern_alr_cat) + Intro10 + patient_sex + as.factor(age_cat) + TB_RF_smoking, data = clean, na.action = na.exclude, family = "gaussian")
summary(model1)
##
## Call:
## glm(formula = log10(Ct_value) ~ as.factor(Western_alr_cat) +
## as.factor(Eastern_alr_cat) + as.factor(Southeastern_alr_cat) +
## Intro10 + patient_sex + as.factor(age_cat) + TB_RF_smoking,
## family = "gaussian", data = clean, na.action = na.exclude)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.2387850 0.0218015 56.821 <2e-16 ***
## as.factor(Western_alr_cat)2 0.0331143 0.0208928 1.585 0.1137
## as.factor(Western_alr_cat)3 0.0311098 0.0187677 1.658 0.0981 .
## as.factor(Western_alr_cat)4 0.0208751 0.0187092 1.116 0.2651
## as.factor(Eastern_alr_cat)2 -0.0033005 0.0150123 -0.220 0.8261
## as.factor(Eastern_alr_cat)3 0.0187428 0.0155232 1.207 0.2279
## as.factor(Southeastern_alr_cat)2 0.0151121 0.0124372 1.215 0.2250
## as.factor(Southeastern_alr_cat)3 0.0007898 0.0127519 0.062 0.9506
## Intro101 -0.0156806 0.0095271 -1.646 0.1005
## patient_sexfemale -0.0003614 0.0111307 -0.032 0.9741
## as.factor(age_cat)2 0.0003732 0.0107446 0.035 0.9723
## as.factor(age_cat)3 -0.0017997 0.0137775 -0.131 0.8961
## as.factor(age_cat)4 0.0139664 0.0181333 0.770 0.4416
## TB_RF_smokingyes -0.0146135 0.0114599 -1.275 0.2029
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 0.009726974)
##
## Null deviance: 4.6156 on 471 degrees of freedom
## Residual deviance: 4.4550 on 458 degrees of freedom
## AIC: -831.44
##
## Number of Fisher Scoring iterations: 2
library(lmtest)
# cough duration
# for the ancestries as category
model1 <- glm(log10(Ct_value) ~ as.factor(Western_alr_cat)+ as.factor(Eastern_alr_cat) + as.factor(Southeastern_alr_cat) + Intro10 + patient_sex + as.factor(age_cat) + TB_RF_smoking + as.factor(cough_cat), data = clean, na.action = na.exclude, family = "gaussian")
summary(model1)
##
## Call:
## glm(formula = log10(Ct_value) ~ as.factor(Western_alr_cat) +
## as.factor(Eastern_alr_cat) + as.factor(Southeastern_alr_cat) +
## Intro10 + patient_sex + as.factor(age_cat) + TB_RF_smoking +
## as.factor(cough_cat), family = "gaussian", data = clean,
## na.action = na.exclude)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.273618 0.023149 55.019 < 2e-16 ***
## as.factor(Western_alr_cat)2 0.026432 0.020672 1.279 0.201681
## as.factor(Western_alr_cat)3 0.025369 0.018540 1.368 0.171862
## as.factor(Western_alr_cat)4 0.014567 0.018497 0.788 0.431389
## as.factor(Eastern_alr_cat)2 -0.001649 0.014790 -0.112 0.911253
## as.factor(Eastern_alr_cat)3 0.017237 0.015300 1.127 0.260509
## as.factor(Southeastern_alr_cat)2 0.011551 0.012287 0.940 0.347675
## as.factor(Southeastern_alr_cat)3 -0.002972 0.012594 -0.236 0.813551
## Intro101 -0.010700 0.009464 -1.131 0.258849
## patient_sexfemale -0.001087 0.010965 -0.099 0.921061
## as.factor(age_cat)2 0.000589 0.010586 0.056 0.955655
## as.factor(age_cat)3 -0.001074 0.013570 -0.079 0.936949
## as.factor(age_cat)4 0.015888 0.017865 0.889 0.374277
## TB_RF_smokingyes -0.017050 0.011308 -1.508 0.132300
## as.factor(cough_cat)2 -0.040415 0.013141 -3.076 0.002227 **
## as.factor(cough_cat)3 -0.039724 0.010471 -3.794 0.000168 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 0.009433898)
##
## Null deviance: 4.6156 on 471 degrees of freedom
## Residual deviance: 4.3019 on 456 degrees of freedom
## AIC: -843.95
##
## Number of Fisher Scoring iterations: 2
library(lmtest)
# socioeconomic status
# for the ancestries as category
model1 <- glm(log10(Ct_value) ~ as.factor(Western_alr_cat)+ as.factor(Eastern_alr_cat) + as.factor(Southeastern_alr_cat) + Intro10 + patient_sex + as.factor(age_cat) + TB_RF_smoking + as.factor(cough_cat) + socio_cat, data = clean, na.action = na.exclude, family = "gaussian")
summary(model1)
##
## Call:
## glm(formula = log10(Ct_value) ~ as.factor(Western_alr_cat) +
## as.factor(Eastern_alr_cat) + as.factor(Southeastern_alr_cat) +
## Intro10 + patient_sex + as.factor(age_cat) + TB_RF_smoking +
## as.factor(cough_cat) + socio_cat, family = "gaussian", data = clean,
## na.action = na.exclude)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.2710949 0.0241330 52.670 < 2e-16 ***
## as.factor(Western_alr_cat)2 0.0268639 0.0207674 1.294 0.196475
## as.factor(Western_alr_cat)3 0.0254099 0.0185765 1.368 0.172037
## as.factor(Western_alr_cat)4 0.0142943 0.0185401 0.771 0.441112
## as.factor(Eastern_alr_cat)2 -0.0013832 0.0148299 -0.093 0.925727
## as.factor(Eastern_alr_cat)3 0.0175194 0.0153576 1.141 0.254569
## as.factor(Southeastern_alr_cat)2 0.0121944 0.0123914 0.984 0.325589
## as.factor(Southeastern_alr_cat)3 -0.0021835 0.0127821 -0.171 0.864440
## Intro101 -0.0105840 0.0094853 -1.116 0.265086
## patient_sexfemale -0.0006988 0.0110128 -0.063 0.949430
## as.factor(age_cat)2 0.0009028 0.0106319 0.085 0.932365
## as.factor(age_cat)3 -0.0006416 0.0136224 -0.047 0.962457
## as.factor(age_cat)4 0.0155325 0.0179655 0.865 0.387730
## TB_RF_smokingyes -0.0181509 0.0115342 -1.574 0.116263
## as.factor(cough_cat)2 -0.0405626 0.0131690 -3.080 0.002195 **
## as.factor(cough_cat)3 -0.0395175 0.0105171 -3.757 0.000194 ***
## socio_cat2 0.0019392 0.0100697 0.193 0.847376
## socio_cat3 0.0072633 0.0142476 0.510 0.610447
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 0.009470011)
##
## Null deviance: 4.6156 on 471 degrees of freedom
## Residual deviance: 4.2994 on 454 degrees of freedom
## AIC: -840.22
##
## Number of Fisher Scoring iterations: 2
library(lmtest)
# socioeconomic status
# for the ancestries as category
model1 <- glm(log10(Ct_value) ~ as.factor(Western_alr_cat)+ as.factor(Eastern_alr_cat) + as.factor(Southeastern_alr_cat) + Intro10 + patient_sex + as.factor(age_cat) + TB_RF_smoking + as.factor(cough_cat) + socio_cat + general_examination_malnutrition, data = clean, na.action = na.exclude, family = "gaussian")
summary(model1)
##
## Call:
## glm(formula = log10(Ct_value) ~ as.factor(Western_alr_cat) +
## as.factor(Eastern_alr_cat) + as.factor(Southeastern_alr_cat) +
## Intro10 + patient_sex + as.factor(age_cat) + TB_RF_smoking +
## as.factor(cough_cat) + socio_cat + general_examination_malnutrition,
## family = "gaussian", data = clean, na.action = na.exclude)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.2700377 0.0243609 52.134 < 2e-16 ***
## as.factor(Western_alr_cat)2 0.0273308 0.0208341 1.312 0.19024
## as.factor(Western_alr_cat)3 0.0263375 0.0187988 1.401 0.16189
## as.factor(Western_alr_cat)4 0.0152098 0.0187574 0.811 0.41787
## as.factor(Eastern_alr_cat)2 -0.0014524 0.0148459 -0.098 0.92211
## as.factor(Eastern_alr_cat)3 0.0173089 0.0153854 1.125 0.26118
## as.factor(Southeastern_alr_cat)2 0.0123161 0.0124088 0.993 0.32147
## as.factor(Southeastern_alr_cat)3 -0.0021354 0.0127954 -0.167 0.86754
## Intro101 -0.0104528 0.0095026 -1.100 0.27192
## patient_sexfemale -0.0009079 0.0110412 -0.082 0.93450
## as.factor(age_cat)2 0.0010626 0.0106529 0.100 0.92059
## as.factor(age_cat)3 -0.0006036 0.0136362 -0.044 0.96471
## as.factor(age_cat)4 0.0156984 0.0179898 0.873 0.38333
## TB_RF_smokingyes -0.0186220 0.0116304 -1.601 0.11004
## as.factor(cough_cat)2 -0.0405017 0.0131832 -3.072 0.00225 **
## as.factor(cough_cat)3 -0.0396298 0.0105327 -3.763 0.00019 ***
## socio_cat2 0.0018717 0.0100815 0.186 0.85279
## socio_cat3 0.0074621 0.0142738 0.523 0.60138
## general_examination_malnutritionyes 0.0059837 0.0178178 0.336 0.73716
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 0.009488554)
##
## Null deviance: 4.6156 on 471 degrees of freedom
## Residual deviance: 4.2983 on 453 degrees of freedom
## AIC: -838.33
##
## Number of Fisher Scoring iterations: 2
library(lmtest)
# socioeconomic status
# for the ancestries as category
model1 <- glm(log10(Ct_value) ~ as.factor(Western_alr_cat)+ as.factor(Eastern_alr_cat) + as.factor(Southeastern_alr_cat) + Intro10 + patient_sex + as.factor(age_cat) + TB_RF_smoking + as.factor(cough_cat) + socio_cat + general_examination_malnutrition + TB_category, data = clean, na.action = na.exclude, family = "gaussian")
summary(model1)
##
## Call:
## glm(formula = log10(Ct_value) ~ as.factor(Western_alr_cat) +
## as.factor(Eastern_alr_cat) + as.factor(Southeastern_alr_cat) +
## Intro10 + patient_sex + as.factor(age_cat) + TB_RF_smoking +
## as.factor(cough_cat) + socio_cat + general_examination_malnutrition +
## TB_category, family = "gaussian", data = clean, na.action = na.exclude)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.2702105 0.0244139 52.028 < 2e-16 ***
## as.factor(Western_alr_cat)2 0.0273058 0.0208573 1.309 0.191142
## as.factor(Western_alr_cat)3 0.0261936 0.0188430 1.390 0.165183
## as.factor(Western_alr_cat)4 0.0151269 0.0187857 0.805 0.421108
## as.factor(Eastern_alr_cat)2 -0.0015469 0.0148750 -0.104 0.917222
## as.factor(Eastern_alr_cat)3 0.0173400 0.0154034 1.126 0.260879
## as.factor(Southeastern_alr_cat)2 0.0122626 0.0124272 0.987 0.324289
## as.factor(Southeastern_alr_cat)3 -0.0022721 0.0128410 -0.177 0.859632
## Intro101 -0.0105080 0.0095199 -1.104 0.270271
## patient_sexfemale -0.0008919 0.0110536 -0.081 0.935728
## as.factor(age_cat)2 0.0010334 0.0106661 0.097 0.922857
## as.factor(age_cat)3 -0.0006048 0.0136510 -0.044 0.964679
## as.factor(age_cat)4 0.0154763 0.0180688 0.857 0.392163
## TB_RF_smokingyes -0.0186900 0.0116516 -1.604 0.109397
## as.factor(cough_cat)2 -0.0404355 0.0132046 -3.062 0.002328 **
## as.factor(cough_cat)3 -0.0396875 0.0105510 -3.761 0.000191 ***
## socio_cat2 0.0018660 0.0100925 0.185 0.853399
## socio_cat3 0.0074870 0.0142902 0.524 0.600585
## general_examination_malnutritionyes 0.0060648 0.0178451 0.340 0.734120
## TB_categoryrelapse 0.0053686 0.0354307 0.152 0.879630
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 0.009509063)
##
## Null deviance: 4.6156 on 471 degrees of freedom
## Residual deviance: 4.2981 on 452 degrees of freedom
## AIC: -836.36
##
## Number of Fisher Scoring iterations: 2
library(lmtest)
# socioeconomic status
# for the ancestries as category
model1 <- glm(log10(Ct_value) ~ as.factor(Western_alr_cat)+ as.factor(Eastern_alr_cat) + as.factor(Southeastern_alr_cat) + Intro10 + patient_sex + as.factor(age_cat) + TB_RF_smoking + as.factor(cough_cat) + socio_cat + general_examination_malnutrition + TB_category + Education, data = clean, na.action = na.exclude, family = "gaussian")
summary(model1)
##
## Call:
## glm(formula = log10(Ct_value) ~ as.factor(Western_alr_cat) +
## as.factor(Eastern_alr_cat) + as.factor(Southeastern_alr_cat) +
## Intro10 + patient_sex + as.factor(age_cat) + TB_RF_smoking +
## as.factor(cough_cat) + socio_cat + general_examination_malnutrition +
## TB_category + Education, family = "gaussian", data = clean,
## na.action = na.exclude)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.2702072 0.0247740 51.272 < 2e-16 ***
## as.factor(Western_alr_cat)2 0.0270033 0.0209049 1.292 0.197119
## as.factor(Western_alr_cat)3 0.0261484 0.0188619 1.386 0.166342
## as.factor(Western_alr_cat)4 0.0151208 0.0188271 0.803 0.422318
## as.factor(Eastern_alr_cat)2 -0.0013482 0.0148639 -0.091 0.927771
## as.factor(Eastern_alr_cat)3 0.0162746 0.0154108 1.056 0.291512
## as.factor(Southeastern_alr_cat)2 0.0122515 0.0124475 0.984 0.325518
## as.factor(Southeastern_alr_cat)3 -0.0009383 0.0129520 -0.072 0.942283
## Intro101 -0.0096428 0.0095276 -1.012 0.312038
## patient_sexfemale -0.0008778 0.0111061 -0.079 0.937041
## as.factor(age_cat)2 0.0039443 0.0112296 0.351 0.725571
## as.factor(age_cat)3 0.0010962 0.0139612 0.079 0.937449
## as.factor(age_cat)4 0.0175929 0.0182754 0.963 0.336238
## TB_RF_smokingyes -0.0159454 0.0117503 -1.357 0.175456
## as.factor(cough_cat)2 -0.0396451 0.0132018 -3.003 0.002823 **
## as.factor(cough_cat)3 -0.0391892 0.0105471 -3.716 0.000228 ***
## socio_cat2 0.0010120 0.0101636 0.100 0.920730
## socio_cat3 0.0038285 0.0144479 0.265 0.791140
## general_examination_malnutritionyes 0.0102834 0.0179618 0.573 0.567261
## TB_categoryrelapse 0.0049328 0.0354320 0.139 0.889340
## Educationno -0.0239194 0.0142524 -1.678 0.093989 .
## Educationsecondary -0.0019552 0.0123818 -0.158 0.874601
## Educationuniversity 0.0188758 0.0219645 0.859 0.390592
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 0.009491995)
##
## Null deviance: 4.6156 on 471 degrees of freedom
## Residual deviance: 4.2619 on 449 degrees of freedom
## AIC: -834.35
##
## Number of Fisher Scoring iterations: 2
library(lmtest)
# socioeconomic status
# for the ancestries as category
model1 <- glm(log10(Ct_value) ~ as.factor(Western_alr_cat)+ as.factor(Eastern_alr_cat) + as.factor(Southeastern_alr_cat) + Intro10 + patient_sex + as.factor(age_cat) + TB_RF_smoking + as.factor(cough_cat) + socio_cat + general_examination_malnutrition + TB_category + Education + DR_category, data = clean, na.action = na.exclude, family = "gaussian")
summary(model1)
##
## Call:
## glm(formula = log10(Ct_value) ~ as.factor(Western_alr_cat) +
## as.factor(Eastern_alr_cat) + as.factor(Southeastern_alr_cat) +
## Intro10 + patient_sex + as.factor(age_cat) + TB_RF_smoking +
## as.factor(cough_cat) + socio_cat + general_examination_malnutrition +
## TB_category + Education + DR_category, family = "gaussian",
## data = clean, na.action = na.exclude)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.2701163 0.0248051 51.204 < 2e-16 ***
## as.factor(Western_alr_cat)2 0.0271249 0.0209368 1.296 0.195795
## as.factor(Western_alr_cat)3 0.0260723 0.0188863 1.380 0.168124
## as.factor(Western_alr_cat)4 0.0150857 0.0188482 0.800 0.423918
## as.factor(Eastern_alr_cat)2 -0.0013131 0.0148809 -0.088 0.929723
## as.factor(Eastern_alr_cat)3 0.0162548 0.0154277 1.054 0.292628
## as.factor(Southeastern_alr_cat)2 0.0124595 0.0125072 0.996 0.319699
## as.factor(Southeastern_alr_cat)3 -0.0008475 0.0129744 -0.065 0.947949
## Intro101 -0.0096538 0.0095380 -1.012 0.312014
## patient_sexfemale -0.0008534 0.0111187 -0.077 0.938856
## as.factor(age_cat)2 0.0040954 0.0112688 0.363 0.716457
## as.factor(age_cat)3 0.0012880 0.0140114 0.092 0.926800
## as.factor(age_cat)4 0.0178601 0.0183472 0.973 0.330854
## TB_RF_smokingyes -0.0161377 0.0118049 -1.367 0.172301
## as.factor(cough_cat)2 -0.0395393 0.0132273 -2.989 0.002951 **
## as.factor(cough_cat)3 -0.0391286 0.0105631 -3.704 0.000238 ***
## socio_cat2 0.0011549 0.0102014 0.113 0.909912
## socio_cat3 0.0038470 0.0144637 0.266 0.790379
## general_examination_malnutritionyes 0.0101227 0.0180003 0.562 0.574150
## TB_categoryrelapse 0.0047640 0.0354808 0.134 0.893251
## Educationno -0.0239086 0.0142678 -1.676 0.094494 .
## Educationsecondary -0.0019037 0.0123979 -0.154 0.878037
## Educationuniversity 0.0188650 0.0219882 0.858 0.391372
## DR_categoryresistant -0.0050409 0.0260864 -0.193 0.846860
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 0.009512389)
##
## Null deviance: 4.6156 on 471 degrees of freedom
## Residual deviance: 4.2616 on 448 degrees of freedom
## AIC: -832.39
##
## Number of Fisher Scoring iterations: 2
library(lmtest)
# for the ancestries as category
model1 <- glm(log10(Ct_value) ~ as.factor(Western_alr_cat)+ as.factor(Eastern_alr_cat) + as.factor(Southeastern_alr_cat) + Intro10 + patient_sex + as.factor(age_cat) + TB_RF_smoking + as.factor(cough_cat) + socio_cat + TB_category + general_examination_malnutrition + Education + DR_category, data = clean, na.action = na.exclude, family = "gaussian")
model2 <- glm(log10(Ct_value) ~ Intro10 + patient_sex + as.factor(age_cat) + TB_RF_smoking + as.factor(cough_cat) + socio_cat+ TB_category + general_examination_malnutrition + Education +DR_category, data = clean, na.action = na.exclude, family = "gaussian")
lrtest(model1, model2)
## Likelihood ratio test
##
## Model 1: log10(Ct_value) ~ as.factor(Western_alr_cat) + as.factor(Eastern_alr_cat) +
## as.factor(Southeastern_alr_cat) + Intro10 + patient_sex +
## as.factor(age_cat) + TB_RF_smoking + as.factor(cough_cat) +
## socio_cat + TB_category + general_examination_malnutrition +
## Education + DR_category
## Model 2: log10(Ct_value) ~ Intro10 + patient_sex + as.factor(age_cat) +
## TB_RF_smoking + as.factor(cough_cat) + socio_cat + TB_category +
## general_examination_malnutrition + Education + DR_category
## #Df LogLik Df Chisq Pr(>Chisq)
## 1 25 441.19
## 2 18 436.80 -7 8.7901 0.2681
summary(model1)
##
## Call:
## glm(formula = log10(Ct_value) ~ as.factor(Western_alr_cat) +
## as.factor(Eastern_alr_cat) + as.factor(Southeastern_alr_cat) +
## Intro10 + patient_sex + as.factor(age_cat) + TB_RF_smoking +
## as.factor(cough_cat) + socio_cat + TB_category + general_examination_malnutrition +
## Education + DR_category, family = "gaussian", data = clean,
## na.action = na.exclude)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.2701163 0.0248051 51.204 < 2e-16 ***
## as.factor(Western_alr_cat)2 0.0271249 0.0209368 1.296 0.195795
## as.factor(Western_alr_cat)3 0.0260723 0.0188863 1.380 0.168124
## as.factor(Western_alr_cat)4 0.0150857 0.0188482 0.800 0.423918
## as.factor(Eastern_alr_cat)2 -0.0013131 0.0148809 -0.088 0.929723
## as.factor(Eastern_alr_cat)3 0.0162548 0.0154277 1.054 0.292628
## as.factor(Southeastern_alr_cat)2 0.0124595 0.0125072 0.996 0.319699
## as.factor(Southeastern_alr_cat)3 -0.0008475 0.0129744 -0.065 0.947949
## Intro101 -0.0096538 0.0095380 -1.012 0.312014
## patient_sexfemale -0.0008534 0.0111187 -0.077 0.938856
## as.factor(age_cat)2 0.0040954 0.0112688 0.363 0.716457
## as.factor(age_cat)3 0.0012880 0.0140114 0.092 0.926800
## as.factor(age_cat)4 0.0178601 0.0183472 0.973 0.330854
## TB_RF_smokingyes -0.0161377 0.0118049 -1.367 0.172301
## as.factor(cough_cat)2 -0.0395393 0.0132273 -2.989 0.002951 **
## as.factor(cough_cat)3 -0.0391286 0.0105631 -3.704 0.000238 ***
## socio_cat2 0.0011549 0.0102014 0.113 0.909912
## socio_cat3 0.0038470 0.0144637 0.266 0.790379
## TB_categoryrelapse 0.0047640 0.0354808 0.134 0.893251
## general_examination_malnutritionyes 0.0101227 0.0180003 0.562 0.574150
## Educationno -0.0239086 0.0142678 -1.676 0.094494 .
## Educationsecondary -0.0019037 0.0123979 -0.154 0.878037
## Educationuniversity 0.0188650 0.0219882 0.858 0.391372
## DR_categoryresistant -0.0050409 0.0260864 -0.193 0.846860
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 0.009512389)
##
## Null deviance: 4.6156 on 471 degrees of freedom
## Residual deviance: 4.2616 on 448 degrees of freedom
## AIC: -832.39
##
## Number of Fisher Scoring iterations: 2
ORs <- exp(cbind(OR = coef(model1), confint(model1)))
## Waiting for profiling to be done...
round(ORs, digits = 2)
## OR 2.5 % 97.5 %
## (Intercept) 3.56 3.39 3.74
## as.factor(Western_alr_cat)2 1.03 0.99 1.07
## as.factor(Western_alr_cat)3 1.03 0.99 1.07
## as.factor(Western_alr_cat)4 1.02 0.98 1.05
## as.factor(Eastern_alr_cat)2 1.00 0.97 1.03
## as.factor(Eastern_alr_cat)3 1.02 0.99 1.05
## as.factor(Southeastern_alr_cat)2 1.01 0.99 1.04
## as.factor(Southeastern_alr_cat)3 1.00 0.97 1.02
## Intro101 0.99 0.97 1.01
## patient_sexfemale 1.00 0.98 1.02
## as.factor(age_cat)2 1.00 0.98 1.03
## as.factor(age_cat)3 1.00 0.97 1.03
## as.factor(age_cat)4 1.02 0.98 1.06
## TB_RF_smokingyes 0.98 0.96 1.01
## as.factor(cough_cat)2 0.96 0.94 0.99
## as.factor(cough_cat)3 0.96 0.94 0.98
## socio_cat2 1.00 0.98 1.02
## socio_cat3 1.00 0.98 1.03
## TB_categoryrelapse 1.00 0.94 1.08
## general_examination_malnutritionyes 1.01 0.98 1.05
## Educationno 0.98 0.95 1.00
## Educationsecondary 1.00 0.97 1.02
## Educationuniversity 1.02 0.98 1.06
## DR_categoryresistant 0.99 0.95 1.05
library(lmtest)
# cough duration
# for the ancestries as category
model2 <- glm(log10(Ct_value) ~ as.factor(Western_alr_cat)+ as.factor(Eastern_alr_cat) + as.factor(Southeastern_alr_cat) + Intro10 + patient_sex + age_cat + TB_RF_smoking + as.factor(cough_cat) + socio_cat + TB_category + general_examination_malnutrition + Education + DR_category, data = clean, na.action = na.exclude, family = "gaussian")
model1 <- glm(log10(Ct_value) ~ as.factor(Western_alr_cat)*Intro10+ as.factor(Eastern_alr_cat)*Intro10 + as.factor(Southeastern_alr_cat)*Intro10 + patient_sex + age_cat + TB_RF_smoking + as.factor(cough_cat) + socio_cat+ TB_category + general_examination_malnutrition + Education + DR_category, data = clean, na.action = na.exclude, family = "gaussian")
summary(model1)
##
## Call:
## glm(formula = log10(Ct_value) ~ as.factor(Western_alr_cat) *
## Intro10 + as.factor(Eastern_alr_cat) * Intro10 + as.factor(Southeastern_alr_cat) *
## Intro10 + patient_sex + age_cat + TB_RF_smoking + as.factor(cough_cat) +
## socio_cat + TB_category + general_examination_malnutrition +
## Education + DR_category, family = "gaussian", data = clean,
## na.action = na.exclude)
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) 1.2610305 0.0299844 42.056
## as.factor(Western_alr_cat)2 0.0342423 0.0270854 1.264
## as.factor(Western_alr_cat)3 0.0323883 0.0245434 1.320
## as.factor(Western_alr_cat)4 0.0293651 0.0245371 1.197
## Intro101 0.0140718 0.0424874 0.331
## as.factor(Eastern_alr_cat)2 -0.0070836 0.0181736 -0.390
## as.factor(Eastern_alr_cat)3 0.0153460 0.0194231 0.790
## as.factor(Southeastern_alr_cat)2 0.0200084 0.0157656 1.269
## as.factor(Southeastern_alr_cat)3 0.0006993 0.0163213 0.043
## patient_sexfemale -0.0024662 0.0112616 -0.219
## age_cat2 0.0041994 0.0114221 0.368
## age_cat3 0.0010683 0.0141891 0.075
## age_cat4 0.0168342 0.0185634 0.907
## TB_RF_smokingyes -0.0175783 0.0119825 -1.467
## as.factor(cough_cat)2 -0.0399688 0.0133557 -2.993
## as.factor(cough_cat)3 -0.0387184 0.0106419 -3.638
## socio_cat2 0.0013931 0.0103017 0.135
## socio_cat3 0.0064851 0.0147087 0.441
## TB_categoryrelapse -0.0018087 0.0360697 -0.050
## general_examination_malnutritionyes 0.0124321 0.0181803 0.684
## Educationno -0.0252910 0.0145431 -1.739
## Educationsecondary -0.0035739 0.0125610 -0.285
## Educationuniversity 0.0195683 0.0221869 0.882
## DR_categoryresistant -0.0059354 0.0263334 -0.225
## as.factor(Western_alr_cat)2:Intro101 -0.0206197 0.0430957 -0.478
## as.factor(Western_alr_cat)3:Intro101 -0.0144263 0.0390595 -0.369
## as.factor(Western_alr_cat)4:Intro101 -0.0379369 0.0391153 -0.970
## Intro101:as.factor(Eastern_alr_cat)2 0.0224824 0.0325223 0.691
## Intro101:as.factor(Eastern_alr_cat)3 0.0062283 0.0329512 0.189
## Intro101:as.factor(Southeastern_alr_cat)2 -0.0223865 0.0264205 -0.847
## Intro101:as.factor(Southeastern_alr_cat)3 -0.0086008 0.0273516 -0.314
## Pr(>|t|)
## (Intercept) < 2e-16 ***
## as.factor(Western_alr_cat)2 0.206815
## as.factor(Western_alr_cat)3 0.187641
## as.factor(Western_alr_cat)4 0.232041
## Intro101 0.740651
## as.factor(Eastern_alr_cat)2 0.696891
## as.factor(Eastern_alr_cat)3 0.429900
## as.factor(Southeastern_alr_cat)2 0.205067
## as.factor(Southeastern_alr_cat)3 0.965845
## patient_sexfemale 0.826757
## age_cat2 0.713304
## age_cat3 0.940017
## age_cat4 0.364982
## TB_RF_smokingyes 0.143089
## as.factor(cough_cat)2 0.002921 **
## as.factor(cough_cat)3 0.000307 ***
## socio_cat2 0.892494
## socio_cat3 0.659498
## TB_categoryrelapse 0.960030
## general_examination_malnutritionyes 0.494447
## Educationno 0.082725 .
## Educationsecondary 0.776140
## Educationuniversity 0.378270
## DR_categoryresistant 0.821777
## as.factor(Western_alr_cat)2:Intro101 0.632558
## as.factor(Western_alr_cat)3:Intro101 0.712050
## as.factor(Western_alr_cat)4:Intro101 0.332641
## Intro101:as.factor(Eastern_alr_cat)2 0.489747
## Intro101:as.factor(Eastern_alr_cat)3 0.850167
## Intro101:as.factor(Southeastern_alr_cat)2 0.397278
## Intro101:as.factor(Southeastern_alr_cat)3 0.753325
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 0.009593273)
##
## Null deviance: 4.6156 on 471 degrees of freedom
## Residual deviance: 4.2306 on 441 degrees of freedom
## AIC: -821.83
##
## Number of Fisher Scoring iterations: 2
ORs <- exp(cbind(OR = coef(model1), confint(model1)))
## Waiting for profiling to be done...
round(ORs, digits = 2)
## OR 2.5 % 97.5 %
## (Intercept) 3.53 3.33 3.74
## as.factor(Western_alr_cat)2 1.03 0.98 1.09
## as.factor(Western_alr_cat)3 1.03 0.98 1.08
## as.factor(Western_alr_cat)4 1.03 0.98 1.08
## Intro101 1.01 0.93 1.10
## as.factor(Eastern_alr_cat)2 0.99 0.96 1.03
## as.factor(Eastern_alr_cat)3 1.02 0.98 1.05
## as.factor(Southeastern_alr_cat)2 1.02 0.99 1.05
## as.factor(Southeastern_alr_cat)3 1.00 0.97 1.03
## patient_sexfemale 1.00 0.98 1.02
## age_cat2 1.00 0.98 1.03
## age_cat3 1.00 0.97 1.03
## age_cat4 1.02 0.98 1.05
## TB_RF_smokingyes 0.98 0.96 1.01
## as.factor(cough_cat)2 0.96 0.94 0.99
## as.factor(cough_cat)3 0.96 0.94 0.98
## socio_cat2 1.00 0.98 1.02
## socio_cat3 1.01 0.98 1.04
## TB_categoryrelapse 1.00 0.93 1.07
## general_examination_malnutritionyes 1.01 0.98 1.05
## Educationno 0.98 0.95 1.00
## Educationsecondary 1.00 0.97 1.02
## Educationuniversity 1.02 0.98 1.07
## DR_categoryresistant 0.99 0.94 1.05
## as.factor(Western_alr_cat)2:Intro101 0.98 0.90 1.07
## as.factor(Western_alr_cat)3:Intro101 0.99 0.91 1.06
## as.factor(Western_alr_cat)4:Intro101 0.96 0.89 1.04
## Intro101:as.factor(Eastern_alr_cat)2 1.02 0.96 1.09
## Intro101:as.factor(Eastern_alr_cat)3 1.01 0.94 1.07
## Intro101:as.factor(Southeastern_alr_cat)2 0.98 0.93 1.03
## Intro101:as.factor(Southeastern_alr_cat)3 0.99 0.94 1.05
lrtest(model1, model2)
## Likelihood ratio test
##
## Model 1: log10(Ct_value) ~ as.factor(Western_alr_cat) * Intro10 + as.factor(Eastern_alr_cat) *
## Intro10 + as.factor(Southeastern_alr_cat) * Intro10 + patient_sex +
## age_cat + TB_RF_smoking + as.factor(cough_cat) + socio_cat +
## TB_category + general_examination_malnutrition + Education +
## DR_category
## Model 2: log10(Ct_value) ~ as.factor(Western_alr_cat) + as.factor(Eastern_alr_cat) +
## as.factor(Southeastern_alr_cat) + Intro10 + patient_sex +
## age_cat + TB_RF_smoking + as.factor(cough_cat) + socio_cat +
## TB_category + general_examination_malnutrition + Education +
## DR_category
## #Df LogLik Df Chisq Pr(>Chisq)
## 1 32 442.91
## 2 25 441.19 -7 3.4368 0.8419
–> no it does not
library(lmtest)
# cough duration
# for the ancestries as category
model2 <- glm(log10(Ct_value) ~ as.factor(Western_alr_cat)+ as.factor(Eastern_alr_cat) + as.factor(Southeastern_alr_cat) + Intro10 + patient_sex + age_cat + TB_RF_smoking + socio_cat + TB_category + general_examination_malnutrition + Education + DR_category, data = clean, na.action = na.exclude, family = "gaussian")
model1 <- glm(log10(Ct_value) ~ as.factor(Western_alr_cat)*Intro10+ as.factor(Eastern_alr_cat)*Intro10 + as.factor(Southeastern_alr_cat)*Intro10 + patient_sex + age_cat + TB_RF_smoking + socio_cat+ TB_category + general_examination_malnutrition + Education + DR_category, data = clean, na.action = na.exclude, family = "gaussian")
summary(model1)
##
## Call:
## glm(formula = log10(Ct_value) ~ as.factor(Western_alr_cat) *
## Intro10 + as.factor(Eastern_alr_cat) * Intro10 + as.factor(Southeastern_alr_cat) *
## Intro10 + patient_sex + age_cat + TB_RF_smoking + socio_cat +
## TB_category + general_examination_malnutrition + Education +
## DR_category, family = "gaussian", data = clean, na.action = na.exclude)
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) 1.2297527 0.0292959 41.977
## as.factor(Western_alr_cat)2 0.0381524 0.0274632 1.389
## as.factor(Western_alr_cat)3 0.0344074 0.0248953 1.382
## as.factor(Western_alr_cat)4 0.0332125 0.0248690 1.335
## Intro101 0.0055354 0.0430309 0.129
## as.factor(Eastern_alr_cat)2 -0.0084508 0.0184258 -0.459
## as.factor(Eastern_alr_cat)3 0.0170052 0.0196426 0.866
## as.factor(Southeastern_alr_cat)2 0.0240648 0.0159530 1.508
## as.factor(Southeastern_alr_cat)3 0.0054801 0.0165073 0.332
## patient_sexfemale -0.0018551 0.0114244 -0.162
## age_cat2 0.0038203 0.0115821 0.330
## age_cat3 0.0001803 0.0143961 0.013
## age_cat4 0.0144029 0.0188256 0.765
## TB_RF_smokingyes -0.0152843 0.0121339 -1.260
## socio_cat2 0.0003932 0.0104457 0.038
## socio_cat3 0.0075365 0.0148911 0.506
## TB_categoryrelapse -0.0044658 0.0364981 -0.122
## general_examination_malnutritionyes 0.0113124 0.0184326 0.614
## Educationno -0.0266781 0.0147495 -1.809
## Educationsecondary -0.0047713 0.0127420 -0.374
## Educationuniversity 0.0207055 0.0225085 0.920
## DR_categoryresistant -0.0094520 0.0267040 -0.354
## as.factor(Western_alr_cat)2:Intro101 -0.0152760 0.0436576 -0.350
## as.factor(Western_alr_cat)3:Intro101 -0.0063865 0.0395777 -0.161
## as.factor(Western_alr_cat)4:Intro101 -0.0331488 0.0396645 -0.836
## Intro101:as.factor(Eastern_alr_cat)2 0.0224089 0.0329516 0.680
## Intro101:as.factor(Eastern_alr_cat)3 0.0053902 0.0333137 0.162
## Intro101:as.factor(Southeastern_alr_cat)2 -0.0239459 0.0267920 -0.894
## Intro101:as.factor(Southeastern_alr_cat)3 -0.0113146 0.0277123 -0.408
## Pr(>|t|)
## (Intercept) <2e-16 ***
## as.factor(Western_alr_cat)2 0.1655
## as.factor(Western_alr_cat)3 0.1676
## as.factor(Western_alr_cat)4 0.1824
## Intro101 0.8977
## as.factor(Eastern_alr_cat)2 0.6467
## as.factor(Eastern_alr_cat)3 0.3871
## as.factor(Southeastern_alr_cat)2 0.1321
## as.factor(Southeastern_alr_cat)3 0.7401
## patient_sexfemale 0.8711
## age_cat2 0.7417
## age_cat3 0.9900
## age_cat4 0.4446
## TB_RF_smokingyes 0.2085
## socio_cat2 0.9700
## socio_cat3 0.6130
## TB_categoryrelapse 0.9027
## general_examination_malnutritionyes 0.5397
## Educationno 0.0712 .
## Educationsecondary 0.7082
## Educationuniversity 0.3581
## DR_categoryresistant 0.7235
## as.factor(Western_alr_cat)2:Intro101 0.7266
## as.factor(Western_alr_cat)3:Intro101 0.8719
## as.factor(Western_alr_cat)4:Intro101 0.4038
## Intro101:as.factor(Eastern_alr_cat)2 0.4968
## Intro101:as.factor(Eastern_alr_cat)3 0.8715
## Intro101:as.factor(Southeastern_alr_cat)2 0.3719
## Intro101:as.factor(Southeastern_alr_cat)3 0.6833
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 0.009877818)
##
## Null deviance: 4.6156 on 471 degrees of freedom
## Residual deviance: 4.3759 on 443 degrees of freedom
## AIC: -809.89
##
## Number of Fisher Scoring iterations: 2
ORs <- exp(cbind(OR = coef(model1), confint(model1)))
## Waiting for profiling to be done...
round(ORs, digits = 2)
## OR 2.5 % 97.5 %
## (Intercept) 3.42 3.23 3.62
## as.factor(Western_alr_cat)2 1.04 0.98 1.10
## as.factor(Western_alr_cat)3 1.04 0.99 1.09
## as.factor(Western_alr_cat)4 1.03 0.98 1.09
## Intro101 1.01 0.92 1.09
## as.factor(Eastern_alr_cat)2 0.99 0.96 1.03
## as.factor(Eastern_alr_cat)3 1.02 0.98 1.06
## as.factor(Southeastern_alr_cat)2 1.02 0.99 1.06
## as.factor(Southeastern_alr_cat)3 1.01 0.97 1.04
## patient_sexfemale 1.00 0.98 1.02
## age_cat2 1.00 0.98 1.03
## age_cat3 1.00 0.97 1.03
## age_cat4 1.01 0.98 1.05
## TB_RF_smokingyes 0.98 0.96 1.01
## socio_cat2 1.00 0.98 1.02
## socio_cat3 1.01 0.98 1.04
## TB_categoryrelapse 1.00 0.93 1.07
## general_examination_malnutritionyes 1.01 0.98 1.05
## Educationno 0.97 0.95 1.00
## Educationsecondary 1.00 0.97 1.02
## Educationuniversity 1.02 0.98 1.07
## DR_categoryresistant 0.99 0.94 1.04
## as.factor(Western_alr_cat)2:Intro101 0.98 0.90 1.07
## as.factor(Western_alr_cat)3:Intro101 0.99 0.92 1.07
## as.factor(Western_alr_cat)4:Intro101 0.97 0.90 1.05
## Intro101:as.factor(Eastern_alr_cat)2 1.02 0.96 1.09
## Intro101:as.factor(Eastern_alr_cat)3 1.01 0.94 1.07
## Intro101:as.factor(Southeastern_alr_cat)2 0.98 0.93 1.03
## Intro101:as.factor(Southeastern_alr_cat)3 0.99 0.94 1.04
lrtest(model1, model2)
## Likelihood ratio test
##
## Model 1: log10(Ct_value) ~ as.factor(Western_alr_cat) * Intro10 + as.factor(Eastern_alr_cat) *
## Intro10 + as.factor(Southeastern_alr_cat) * Intro10 + patient_sex +
## age_cat + TB_RF_smoking + socio_cat + TB_category + general_examination_malnutrition +
## Education + DR_category
## Model 2: log10(Ct_value) ~ as.factor(Western_alr_cat) + as.factor(Eastern_alr_cat) +
## as.factor(Southeastern_alr_cat) + Intro10 + patient_sex +
## age_cat + TB_RF_smoking + socio_cat + TB_category + general_examination_malnutrition +
## Education + DR_category
## #Df LogLik Df Chisq Pr(>Chisq)
## 1 30 434.95
## 2 23 433.19 -7 3.5211 0.833